From 99048e0294f2638f7bde677f386f12b68aa3948f Mon Sep 17 00:00:00 2001 From: cclauss Date: Sun, 15 Apr 2018 13:32:18 +0200 Subject: [PATCH 001/291] Flake8 testing finds undefined names --- .travis.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c72bdbf0..100ed8c2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,12 @@ python: - "3.6" install: - pip install -r requirements.txt - - pip install codeclimate-test-reporter + - pip install codeclimate-test-reporter flake8 +before_script: + # stop the build if there are Python syntax errors or undefined names + - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics script: - python -m tests - coverage run -m tests From 66f7d3f7f60bd1bced32b35bc2c07ffc18cdf6bd Mon Sep 17 00:00:00 2001 From: KevinHock Date: Mon, 16 Apr 2018 20:54:43 -0700 Subject: [PATCH 002/291] Changed python commands to python3 to be more explicit --- README.rst | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 8f502650..89b0382e 100644 --- a/README.rst +++ b/README.rst @@ -33,8 +33,6 @@ Features * Search GitHub and analyse hits with PyT -* Scan intraprocedural or interprocedural - * A lot of customisation possible Example usage and output: @@ -46,20 +44,20 @@ Install 1. git clone https://github.com/python-security/pyt.git 2. cd pyt/ - 3. python setup.py install + 3. python3 setup.py install 4. pyt -h Usage from Source ================= -Using it like a user ``python -m pyt -f example/vulnerable_code/XSS_call.py save -du`` +Using it like a user ``python3 -m pyt -f example/vulnerable_code/XSS_call.py save -du`` -Running the tests ``python -m tests`` +Running the tests ``python3 -m tests`` -Running an individual test file ``python -m unittest tests.import_test`` +Running an individual test file ``python3 -m unittest tests.import_test`` -Running an individual test ``python -m unittest tests.import_test.ImportTest.test_import`` +Running an individual test ``python3 -m unittest tests.import_test.ImportTest.test_import`` Contributions From b98bf50e45d4e441fcec279116fc6111013eeb9b Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 16 Apr 2018 21:07:23 -0700 Subject: [PATCH 003/291] Combine flask and django trigger words, make it the default --- pyt/argument_helpers.py | 2 +- .../all_trigger_words.pyt | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 pyt/vulnerability_definitions/all_trigger_words.pyt diff --git a/pyt/argument_helpers.py b/pyt/argument_helpers.py index a2ecee35..847636f9 100644 --- a/pyt/argument_helpers.py +++ b/pyt/argument_helpers.py @@ -15,7 +15,7 @@ default_trigger_word_file = os.path.join( os.path.dirname(__file__), 'vulnerability_definitions', - 'flask_trigger_words.pyt' + 'all_trigger_words.pyt' ) diff --git a/pyt/vulnerability_definitions/all_trigger_words.pyt b/pyt/vulnerability_definitions/all_trigger_words.pyt new file mode 100644 index 00000000..656c8386 --- /dev/null +++ b/pyt/vulnerability_definitions/all_trigger_words.pyt @@ -0,0 +1,34 @@ +sources: +request.args.get( +Markup( +POST.get( +GET.get( +META.get( +POST[ +GET[ +META[ +FILES[ +.data +form[ +form( +mark_safe( +cookies[ +files[ +SQLAlchemy + +sinks: +replace( -> escape +send_file( -> '..', '..' in +execute( +system( +filter( +subprocess.call( +render_template( +set_cookie( +redirect( +url_for( +flash( +jsonify( +render( +render_to_response( +Popen( \ No newline at end of file From c2a4e67dbee2d7acc7638d321b499101b610ee24 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 17 Apr 2018 09:34:41 -0700 Subject: [PATCH 004/291] Fix Ifatty line_number arg RaiseNode bug --- pyt/node_types.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyt/node_types.py b/pyt/node_types.py index 8451e9c3..9d194bcf 100644 --- a/pyt/node_types.py +++ b/pyt/node_types.py @@ -138,14 +138,13 @@ def __init__(self, label): class RaiseNode(Node, ConnectToExitNode): """CFG Node that represents a Raise statement.""" - def __init__(self, ast_node, *, line_number, path): + def __init__(self, ast_node, *, path): label_visitor = LabelVisitor() label_visitor.visit(ast_node) super().__init__( label_visitor.result, ast_node, - line_number=line_number, path=path ) From b3110b3e99c9aed855a242f74594306eec8dcf24 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 17 Apr 2018 09:44:39 -0700 Subject: [PATCH 005/291] Fix the foddy infinite loop --- pyt/stmt_visitor_helper.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/pyt/stmt_visitor_helper.py b/pyt/stmt_visitor_helper.py index 21f1f666..315c3332 100644 --- a/pyt/stmt_visitor_helper.py +++ b/pyt/stmt_visitor_helper.py @@ -1,4 +1,5 @@ import ast +import random from collections import namedtuple from .node_types import ( @@ -101,16 +102,24 @@ def get_first_node( node, node_not_to_step_past ): + """ + This is a super hacky way of getting the first node after a statement. + We do this because we visit a statement and keep on visiting and get something in return that is rarely the first node. + So we loop and loop backwards until we hit the statement or there is nothing to step back to. + """ ingoing = None + i = 0 current_node = node while current_node.ingoing: + # This is used because there may be multiple ingoing and loop will cause an infinite loop if we did [0] + i = random.randrange(len(current_node.ingoing)) # e.g. We don't want to step past the Except of an Except basic block - if current_node.ingoing[0] == node_not_to_step_past: + if current_node.ingoing[i] == node_not_to_step_past: break ingoing = current_node.ingoing - current_node = current_node.ingoing[0] + current_node = current_node.ingoing[i] if ingoing: - return ingoing[0] + return ingoing[i] return current_node From 054b70611579d28e6dcef4c9e1c4356e42a698d5 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Wed, 18 Apr 2018 19:14:55 -0700 Subject: [PATCH 006/291] Skip making self a TaintedNode, vuln test for it, flake8 vuln_test, 88->89% --- .pre-commit-config.yaml | 2 +- .../def_with_self_as_first_arg.py | 4 ++ pyt/framework_adaptor.py | 31 +++++++++---- pyt/node_types.py | 4 ++ tests/vulnerabilities_test.py | 46 ++++++++++++++++--- tox.ini | 2 +- 6 files changed, 70 insertions(+), 19 deletions(-) create mode 100644 examples/example_inputs/def_with_self_as_first_arg.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8ef0f176..33a48e76 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,4 +8,4 @@ - id: check-ast - id: check-symlinks - id: flake8 - args: ['--ignore=E501'] + args: ['--exclude=examples/*', '--ignore=E501,E741'] diff --git a/examples/example_inputs/def_with_self_as_first_arg.py b/examples/example_inputs/def_with_self_as_first_arg.py new file mode 100644 index 00000000..0c244455 --- /dev/null +++ b/examples/example_inputs/def_with_self_as_first_arg.py @@ -0,0 +1,4 @@ + + +def my_data(self, foo, bar): + return redirect(self.something) diff --git a/pyt/framework_adaptor.py b/pyt/framework_adaptor.py index d602a92f..c7e5119d 100644 --- a/pyt/framework_adaptor.py +++ b/pyt/framework_adaptor.py @@ -5,7 +5,10 @@ from .ast_helper import Arguments from .expr_visitor import make_cfg from .module_definitions import project_definitions -from .node_types import TaintedNode +from .node_types import ( + AssignmentNode, + TaintedNode +) class FrameworkAdaptor(): @@ -37,19 +40,27 @@ def get_func_cfg_with_tainted_args(self, definition): first_node_after_args = func_cfg.nodes[1] first_node_after_args.ingoing = list() - # We're just gonna give all the tainted args the lineno of the def + # We are just going to give all the tainted args the lineno of the def definition_lineno = definition.node.lineno # Taint all the arguments - for arg in args: - tainted_node = TaintedNode(arg, arg, - None, [], - line_number=definition_lineno, - path=definition.path) - function_entry_node.connect(tainted_node) + for i, arg in enumerate(args): + node_type = TaintedNode + if i == 0 and arg == 'self': + node_type = AssignmentNode + + arg_node = node_type( + label=arg, + left_hand_side=arg, + ast_node=None, + right_hand_side_variables=[], + line_number=definition_lineno, + path=definition.path + ) + function_entry_node.connect(arg_node) # 1 and not 0 so that Entry Node remains first in the list - func_cfg.nodes.insert(1, tainted_node) - tainted_node.connect(first_node_after_args) + func_cfg.nodes.insert(1, arg_node) + arg_node.connect(first_node_after_args) return func_cfg diff --git a/pyt/node_types.py b/pyt/node_types.py index 9d194bcf..3819963a 100644 --- a/pyt/node_types.py +++ b/pyt/node_types.py @@ -176,6 +176,10 @@ def __repr__(self): class TaintedNode(AssignmentNode): + """CFG Node that represents a tainted node. + + Only created in framework_adaptor.py and only used in `identify_triggers` of vulnerabilities.py + """ pass diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index fb3c00f2..f3d77279 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -15,9 +15,10 @@ from pyt.constraint_table import initialize_constraint_table from pyt.fixed_point import analyse from pyt.framework_adaptor import FrameworkAdaptor -from pyt.framework_helper import( +from pyt.framework_helper import ( is_django_view_function, - is_flask_route_function + is_flask_route_function, + is_function ) from pyt.node_types import Node from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis @@ -95,17 +96,15 @@ def test_find_triggers(self): l = vulnerabilities.find_triggers(XSS1.nodes, trigger_words) self.assert_length(l, expected_length=1) - def test_find_sanitiser_nodes(self): cfg_node = Node(None, None, line_number=None, path=None) - sanitiser_tuple = vulnerabilities.Sanitiser('escape', cfg_node) + sanitiser_tuple = vulnerabilities.Sanitiser('escape', cfg_node) sanitiser = 'escape' result = list(vulnerabilities.find_sanitiser_nodes(sanitiser, [sanitiser_tuple])) self.assert_length(result, expected_length=1) self.assertEqual(result[0], cfg_node) - def test_build_sanitiser_node_dict(self): self.cfg_create_from_file('examples/vulnerable_code/XSS_sanitised.py') cfg_list = [self.cfg] @@ -114,7 +113,7 @@ def test_build_sanitiser_node_dict(self): cfg = cfg_list[1] - cfg_node = Node(None, None, line_number=None, path=None) + cfg_node = Node(None, None, line_number=None, path=None) sinks_in_file = [vulnerabilities.TriggerNode('replace', ['escape'], cfg_node)] sanitiser_dict = vulnerabilities.build_sanitiser_node_dict(cfg, sinks_in_file) @@ -142,7 +141,6 @@ def run_analysis(self, path): ) ) - def test_find_vulnerabilities_assign_other_var(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_assign_to_other_var.py') self.assert_length(vulnerabilities, expected_length=1) @@ -555,3 +553,37 @@ def test_django_view_param(self): ~call_1 = ret_render(request, 'templates/xss.html', 'param'param) """ self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + + +class EngineEveryTest(BaseTestCase): + def run_empty(self): + return + + def run_analysis(self, path): + self.cfg_create_from_file(path) + cfg_list = [self.cfg] + + FrameworkAdaptor(cfg_list, [], [], is_function) + initialize_constraint_table(cfg_list) + + analyse(cfg_list, analysis_type=ReachingDefinitionsTaintAnalysis) + + trigger_word_file = os.path.join( + 'pyt', + 'vulnerability_definitions', + 'all_trigger_words.pyt' + ) + + return vulnerabilities.find_vulnerabilities( + cfg_list, + ReachingDefinitionsTaintAnalysis, + UImode.NORMAL, + VulnerabilityFiles( + default_blackbox_mapping_file, + trigger_word_file + ) + ) + + def test_self_is_not_tainted(self): + vulnerabilities = self.run_analysis('examples/example_inputs/def_with_self_as_first_arg.py') + self.assert_length(vulnerabilities, expected_length=0) diff --git a/tox.ini b/tox.ini index 933a1460..70393146 100644 --- a/tox.ini +++ b/tox.ini @@ -7,5 +7,5 @@ deps = -rrequirements-dev.txt commands = coverage erase coverage run tests - coverage report --show-missing --fail-under 88 + coverage report --show-missing --fail-under 89 pre-commit run From 84f7eac80ecb23b021ba67b804f91f231a227f8d Mon Sep 17 00:00:00 2001 From: KevinHock Date: Wed, 18 Apr 2018 19:22:23 -0700 Subject: [PATCH 007/291] Missed a spot (python -> python3) --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 89b0382e..9bd41255 100644 --- a/README.rst +++ b/README.rst @@ -89,7 +89,7 @@ Create the virtual environment Check that you have the right versions -``python --version`` sample output ``Python 3.6.0`` +``python3 --version`` sample output ``Python 3.6.0`` ``pip --version`` sample output ``pip 9.0.1 from /Users/kevinhock/a_folder/lib/python3.6/site-packages (python 3.6)`` From 101f3204ccba6bc76b0d216258022eac93557962 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Wed, 18 Apr 2018 20:15:09 -0700 Subject: [PATCH 008/291] Our first CHANGELOG.md, added Unreleased section --- CHANGELOG.md | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..328cfd3e --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,55 @@ +# What's New + +Thanks to all our contributors, users, and the many people that make PyT possible! :heart: + +If you love PyT, please star our project on GitHub to show your support! :star: + + + +# Unreleased +##### April 18, 2018 + +#### :tada: New Features + +* Baseline support by in ([#106], thanks [@omergunal]) + +[#106]: https://github.com/python-security/pyt/pull/106 +[@omergunal]: https://github.com/omergunal + +#### :sparkles: Usability +* Combined all source/sink files and made it the default (#116) + +#### :telescope: Precision +* Fixed a bug where "Post.query.paginate" progated taint (#115) +* Fixed a false-positive where `self` was marked as taint (#119) + +#### :bug: Bugfixes +* Fixed a bug where `visit_Raise` raised a `TypeError` (#117) +* Fixed an infinite loop bug that was caused while handling certain loops (#118) + +#### :snake: Miscellaneous + +* Moved out a bunch of historical files to the [ReadTheDocs repo](https://github.com/KevinHock/rtdpyt) (#110, #111) + +[#106]: https://github.com/python-security/pyt/pull/116 +[#106]: https://github.com/python-security/pyt/pull/115 +[#106]: https://github.com/python-security/pyt/pull/119 +[#106]: https://github.com/python-security/pyt/pull/117 +[#106]: https://github.com/python-security/pyt/pull/118 +[#106]: https://github.com/python-security/pyt/pull/111 +[#106]: https://github.com/python-security/pyt/pull/110 From 9aaa926b11eea6cc14615c36db81246baebe8fc9 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Wed, 18 Apr 2018 20:17:05 -0700 Subject: [PATCH 009/291] Missing square brackets --- CHANGELOG.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 328cfd3e..b89d349d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,19 +32,19 @@ If you love PyT, please star our project on GitHub to show your support! :star: [@omergunal]: https://github.com/omergunal #### :sparkles: Usability -* Combined all source/sink files and made it the default (#116) +* Combined all source/sink files and made it the default ([#116]) #### :telescope: Precision -* Fixed a bug where "Post.query.paginate" progated taint (#115) -* Fixed a false-positive where `self` was marked as taint (#119) +* Fixed a bug where "Post.query.paginate" progated taint ([#115]) +* Fixed a false-positive where `self` was marked as taint ([#119]) #### :bug: Bugfixes -* Fixed a bug where `visit_Raise` raised a `TypeError` (#117) -* Fixed an infinite loop bug that was caused while handling certain loops (#118) +* Fixed a bug where `visit_Raise` raised a `TypeError` ([#117]) +* Fixed an infinite loop bug that was caused while handling certain loops ([#118]) #### :snake: Miscellaneous -* Moved out a bunch of historical files to the [ReadTheDocs repo](https://github.com/KevinHock/rtdpyt) (#110, #111) +* Moved out a bunch of historical files to the [ReadTheDocs repo](https://github.com/KevinHock/rtdpyt) ([#110], [#111]) [#106]: https://github.com/python-security/pyt/pull/116 [#106]: https://github.com/python-security/pyt/pull/115 From 4bf72b0e0328a1840549c6bf8ceff6f7c88a301d Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Wed, 18 Apr 2018 20:18:52 -0700 Subject: [PATCH 010/291] I should go to sleep now :D --- CHANGELOG.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b89d349d..0219c59b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,10 +46,10 @@ If you love PyT, please star our project on GitHub to show your support! :star: * Moved out a bunch of historical files to the [ReadTheDocs repo](https://github.com/KevinHock/rtdpyt) ([#110], [#111]) -[#106]: https://github.com/python-security/pyt/pull/116 -[#106]: https://github.com/python-security/pyt/pull/115 -[#106]: https://github.com/python-security/pyt/pull/119 -[#106]: https://github.com/python-security/pyt/pull/117 -[#106]: https://github.com/python-security/pyt/pull/118 -[#106]: https://github.com/python-security/pyt/pull/111 -[#106]: https://github.com/python-security/pyt/pull/110 +[#116]: https://github.com/python-security/pyt/pull/116 +[#115]: https://github.com/python-security/pyt/pull/115 +[#119]: https://github.com/python-security/pyt/pull/119 +[#117]: https://github.com/python-security/pyt/pull/117 +[#118]: https://github.com/python-security/pyt/pull/118 +[#111]: https://github.com/python-security/pyt/pull/111 +[#110]: https://github.com/python-security/pyt/pull/110 From 45f848362eaf1b494b7150a5fe2b3643818acd25 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Wed, 18 Apr 2018 20:20:39 -0700 Subject: [PATCH 011/291] I should go to sleep now :D --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0219c59b..b9418124 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,10 +32,10 @@ If you love PyT, please star our project on GitHub to show your support! :star: [@omergunal]: https://github.com/omergunal #### :sparkles: Usability -* Combined all source/sink files and made it the default ([#116]) +* Combined all source/sink information files and made it the default ([#116]) #### :telescope: Precision -* Fixed a bug where "Post.query.paginate" progated taint ([#115]) +* Fixed a bug where `Post.query.paginate` propagated taint ([#115]) * Fixed a false-positive where `self` was marked as taint ([#119]) #### :bug: Bugfixes From c15a10c496d056f190a3c98da04750747057f2f8 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Wed, 18 Apr 2018 20:42:29 -0700 Subject: [PATCH 012/291] Typo --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9418124..ba1451e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,7 +26,7 @@ If you love PyT, please star our project on GitHub to show your support! :star: #### :tada: New Features -* Baseline support by in ([#106], thanks [@omergunal]) +* Baseline support ([#106], thanks [@omergunal]) [#106]: https://github.com/python-security/pyt/pull/106 [@omergunal]: https://github.com/omergunal From 3f97f9923748b993033f3da242ffeb84d4acd53a Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 19 Apr 2018 18:18:06 -0700 Subject: [PATCH 013/291] Added thanks @lfatty --- CHANGELOG.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba1451e4..18e840a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ If you love PyT, please star our project on GitHub to show your support! :star: #### :boom: Breaking Changes #### :tada: New Features #### :sparkles: Usability +#### :mortar_board: Walkthrough / Help #### :telescope: Precision #### :bug: Bugfixes #### :snake: Miscellaneous @@ -36,10 +37,10 @@ If you love PyT, please star our project on GitHub to show your support! :star: #### :telescope: Precision * Fixed a bug where `Post.query.paginate` propagated taint ([#115]) -* Fixed a false-positive where `self` was marked as taint ([#119]) +* Fixed a false-positive where `self` was marked as taint ([#119], thanks [@lFatty]) #### :bug: Bugfixes -* Fixed a bug where `visit_Raise` raised a `TypeError` ([#117]) +* Fixed a bug where `visit_Raise` raised a `TypeError` ([#117], thanks [@lFatty]) * Fixed an infinite loop bug that was caused while handling certain loops ([#118]) #### :snake: Miscellaneous @@ -53,3 +54,4 @@ If you love PyT, please star our project on GitHub to show your support! :star: [#118]: https://github.com/python-security/pyt/pull/118 [#111]: https://github.com/python-security/pyt/pull/111 [#110]: https://github.com/python-security/pyt/pull/110 +[@lfatty]: https://github.com/lfatty From 09c99be4a813fc11a49783d1a7e7cc99875f8271 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 20 Apr 2018 17:35:22 -0700 Subject: [PATCH 014/291] Format parsing of command line args --- pyt/__main__.py | 308 ++++++++++++++++++++++++++++++------------------ 1 file changed, 191 insertions(+), 117 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 4ceb67d1..9d2b7e21 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -60,135 +60,209 @@ def parse_args(args): subparsers = parser.add_subparsers() entry_group = parser.add_mutually_exclusive_group(required=True) - entry_group.add_argument('-f', '--filepath', - help='Path to the file that should be analysed.', - type=str) - entry_group.add_argument('-gr', '--git-repos', - help='Takes a CSV file of git_url, path per entry.', - type=str) - - parser.add_argument('-pr', '--project-root', - help='Add project root, this is important when the entry' + - ' file is not at the root of the project.', type=str) - parser.add_argument('-d', '--draw-cfg', - help='Draw CFG and output as .pdf file.', - action='http://github.com/store_true') - parser.add_argument('-o', '--output-filename', - help='Output filename.', type=str) - parser.add_argument('-csv', '--csv-path', type=str, - help='Give the path of the csv file' - ' repos should be added to.') + entry_group.add_argument( + '-f', '--filepath', + help='Path to the file that should be analysed.', + type=str + ) + entry_group.add_argument( + '-gr', '--git-repos', + help='Takes a CSV file of git_url, path per entry.', + type=str + ) + + parser.add_argument( + '-pr', '--project-root', + help='Add project root, this is important when the entry ' + 'file is not at the root of the project.', + type=str + ) + parser.add_argument( + '-d', '--draw-cfg', + help='Draw CFG and output as .pdf file.', + action='http://github.com/store_true' + ) + parser.add_argument( + '-o', '--output-filename', + help='Output filename.', type=str + ) + parser.add_argument( + '-csv', '--csv-path', type=str, + help='Give the path of the csv file' + ' repos should be added to.' + ) + parser.add_argument( + '-t', '--trigger-word-file', + help='Input trigger word file.', + type=str, + default=default_trigger_word_file + ) + parser.add_argument( + '-m', '--blackbox-mapping-file', + help='Input blackbox mapping file.', + type=str, + default=default_blackbox_mapping_file + ) + parser.add_argument( + '-py2', '--python-2', + help='[WARNING, EXPERIMENTAL] Turns on Python 2 mode,' + + ' needed when target file(s) are written in Python 2.', + action='http://github.com/store_true' + ) + parser.add_argument( + '-l', '--log-level', + help='Choose logging level: CRITICAL, ERROR,' + ' WARNING(Default), INFO, DEBUG, NOTSET.', + type=str + ) + parser.add_argument( + '-a', '--adaptor', + help='Choose an adaptor: Flask(Default), Django, Every or Pylons', + type=str + ) + parser.add_argument( + '-db', '--create-database', + help='Creates a sql file that can be used to' + ' create a database.', + action='http://github.com/store_true' + ) + parser.add_argument( + '-dl', '--draw-lattice', + nargs='+', help='Draws a lattice.' + ) + parser.add_argument( + '-j', '--json', + help='Prints JSON instead of report.', + action='http://github.com/store_true', + default=False + ) + parser.add_argument( + '-ppm', '--print-project-modules', + help='Print project modules.', action='http://github.com/store_true' + ) + parser.add_argument( + '-b', '--baseline', + help='path of a baseline report to compare against ' + '(only JSON-formatted files are accepted)', + type=str, + default=False + ) print_group = parser.add_mutually_exclusive_group() - print_group.add_argument('-p', '--print', - help='Prints the nodes of the CFG.', - action='http://github.com/store_true') - print_group.add_argument('-vp', '--verbose-print', - help='Verbose printing of -p.', action='http://github.com/store_true') - print_group.add_argument('-trim', '--trim-reassigned-in', - help='Trims the reassigned list to the vulnerability chain.', - action='http://github.com/store_true', - default=False) - print_group.add_argument('-i', '--interactive', - help='Will ask you about each vulnerability chain and blackbox nodes.', - action='http://github.com/store_true', - default=False) - - parser.add_argument('-t', '--trigger-word-file', - help='Input trigger word file.', - type=str, - default=default_trigger_word_file) - parser.add_argument('-m', '--blackbox-mapping-file', - help='Input blackbox mapping file.', - type=str, - default=default_blackbox_mapping_file) - parser.add_argument('-py2', '--python-2', - help='[WARNING, EXPERIMENTAL] Turns on Python 2 mode,' + - ' needed when target file(s) are written in Python 2.', action='http://github.com/store_true') - parser.add_argument('-l', '--log-level', - help='Choose logging level: CRITICAL, ERROR,' + - ' WARNING(Default), INFO, DEBUG, NOTSET.', type=str) - parser.add_argument('-a', '--adaptor', - help='Choose an adaptor: Flask(Default), Django, Every or Pylons', - type=str) - parser.add_argument('-db', '--create-database', - help='Creates a sql file that can be used to' + - ' create a database.', action='http://github.com/store_true') - parser.add_argument('-dl', '--draw-lattice', - nargs='+', help='Draws a lattice.') - parser.add_argument('-j', '--json', - help='Prints JSON instead of report.', - action='http://github.com/store_true', - default=False) + print_group.add_argument( + '-p', '--print', + help='Prints the nodes of the CFG.', + action='http://github.com/store_true' + ) + print_group.add_argument( + '-vp', '--verbose-print', + help='Verbose printing of -p.', + action='http://github.com/store_true' + ) + print_group.add_argument( + '-trim', '--trim-reassigned-in', + help='Trims the reassigned list to the vulnerability chain.', + action='http://github.com/store_true', + default=False + ) + print_group.add_argument( + '-i', '--interactive', + help='Will ask you about each vulnerability chain and blackbox nodes.', + action='http://github.com/store_true', + default=False + ) analysis_group = parser.add_mutually_exclusive_group() - analysis_group.add_argument('-li', '--liveness', - help='Run liveness analysis. Default is' + - ' reaching definitions tainted version.', - action='http://github.com/store_true') - analysis_group.add_argument('-re', '--reaching', - help='Run reaching definitions analysis.' + - ' Default is reaching definitions' + - ' tainted version.', action='http://github.com/store_true') - analysis_group.add_argument('-rt', '--reaching-taint', - help='This is the default analysis:' + - ' reaching definitions tainted version.', - action='http://github.com/store_true') - - parser.add_argument('-ppm', '--print-project-modules', - help='Print project modules.', action='http://github.com/store_true') - parser.add_argument('-b', '--baseline', - help='path of a baseline report to compare against ' - '(only JSON-formatted files are accepted)', - type=str, - default=False) - - save_parser = subparsers.add_parser('save', help='Save menu.') - save_parser.set_defaults(which='save') - save_parser.add_argument('-fp', '--filename-prefix', - help='Filename prefix fx file_lattice.pyt', - type=str) - save_parser.add_argument('-du', '--def-use-chain', - help='Output the def-use chain(s) to file.', - action='http://github.com/store_true') - save_parser.add_argument('-ud', '--use-def-chain', - help='Output the use-def chain(s) to file', - action='http://github.com/store_true') - save_parser.add_argument('-cfg', '--control-flow-graph', - help='Output the CFGs to file.', - action='http://github.com/store_true') - save_parser.add_argument('-vcfg', '--verbose-control-flow-graph', - help='Output the verbose CFGs to file.', - action='http://github.com/store_true') - save_parser.add_argument('-an', '--analysis', - help='Output analysis results to file' + - ' in form of a constraint table.', - action='http://github.com/store_true') - save_parser.add_argument('-la', '--lattice', help='Output lattice(s) to file.', - action='http://github.com/store_true') - save_parser.add_argument('-vu', '--vulnerabilities', - help='Output vulnerabilities to file.', - action='http://github.com/store_true') - save_parser.add_argument('-all', '--save-all', - help='Output everything to file.', - action='http://github.com/store_true') + analysis_group.add_argument( + '-li', '--liveness', + help='Run liveness analysis. Default is' + ' reaching definitions tainted version.', + action='http://github.com/store_true' + ) + analysis_group.add_argument( + '-re', '--reaching', + help='Run reaching definitions analysis.' + ' Default is reaching definitions' + ' tainted version.', + action='http://github.com/store_true' + ) + analysis_group.add_argument( + '-rt', '--reaching-taint', + help='This is the default analysis:' + ' reaching definitions tainted version.', + action='http://github.com/store_true' + ) + save_parser = subparsers.add_parser( + 'save', + help='Save menu.' + ) + save_parser.set_defaults(which='save') + save_parser.add_argument( + '-fp', '--filename-prefix', + help='Filename prefix fx file_lattice.pyt', + type=str + ) + save_parser.add_argument( + '-du', '--def-use-chain', + help='Output the def-use chain(s) to file.', + action='http://github.com/store_true' + ) + save_parser.add_argument( + '-ud', '--use-def-chain', + help='Output the use-def chain(s) to file', + action='http://github.com/store_true' + ) + save_parser.add_argument( + '-cfg', '--control-flow-graph', + help='Output the CFGs to file.', + action='http://github.com/store_true' + ) + save_parser.add_argument( + '-vcfg', '--verbose-control-flow-graph', + help='Output the verbose CFGs to file.', + action='http://github.com/store_true' + ) + save_parser.add_argument( + '-an', '--analysis', + help='Output analysis results to file ' + 'in form of a constraint table.', + action='http://github.com/store_true' + ) + save_parser.add_argument( + '-la', '--lattice', help='Output lattice(s) to file.', + action='http://github.com/store_true' + ) + save_parser.add_argument( + '-vu', '--vulnerabilities', + help='Output vulnerabilities to file.', + action='http://github.com/store_true' + ) + save_parser.add_argument( + '-all', '--save-all', + help='Output everything to file.', + action='http://github.com/store_true' + ) search_parser = subparsers.add_parser( 'github_search', - help='Searches through github and runs PyT' - ' on found repositories. This can take some time.') + help='Searches through github and runs PyT ' + 'on found repositories. This can take some time.' + ) search_parser.set_defaults(which='search') - search_parser.add_argument( '-ss', '--search-string', required=True, - help='String for searching for repos on github.', type=str) + help='String for searching for repos on github.', + type=str + ) + search_parser.add_argument( + '-sd', '--start-date', + help='Start date for repo search. ' + 'Criteria used is Created Date.', + type=valid_date, + default=date(2010, 1, 1) + ) - search_parser.add_argument('-sd', '--start-date', - help='Start date for repo search. ' - 'Criteria used is Created Date.', - type=valid_date, - default=date(2010, 1, 1)) return parser.parse_args(args) From aae962534e941fa8f58c3e40ae4b51b520707f42 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 20 Apr 2018 17:38:09 -0700 Subject: [PATCH 015/291] Fix https://stackoverflow.com/questions/43393764/python-3-6-project-structure-leads-to-runtimewarning --- pyt/__init__.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pyt/__init__.py b/pyt/__init__.py index aa35dd91..e69de29b 100644 --- a/pyt/__init__.py +++ b/pyt/__init__.py @@ -1,5 +0,0 @@ -from .__main__ import main - - -if __name__ == '__main__': - main() From ed59b6d9fa276d2b5a34140f6851ef603cbc6d28 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 20 Apr 2018 17:43:13 -0700 Subject: [PATCH 016/291] [tests] Make test_no_args pass after re-formatting the arg parsing --- tests/command_line_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/command_line_test.py b/tests/command_line_test.py index a4663225..9ee97067 100644 --- a/tests/command_line_test.py +++ b/tests/command_line_test.py @@ -24,10 +24,10 @@ def test_no_args(self): EXPECTED = """usage: python -m pyt [-h] (-f FILEPATH | -gr GIT_REPOS) [-pr PROJECT_ROOT] [-d] [-o OUTPUT_FILENAME] [-csv CSV_PATH] - [-p | -vp | -trim | -i] [-t TRIGGER_WORD_FILE] - [-m BLACKBOX_MAPPING_FILE] [-py2] [-l LOG_LEVEL] - [-a ADAPTOR] [-db] [-dl DRAW_LATTICE [DRAW_LATTICE ...]] - [-j] [-li | -re | -rt] [-ppm] [-b BASELINE] + [-t TRIGGER_WORD_FILE] [-m BLACKBOX_MAPPING_FILE] [-py2] + [-l LOG_LEVEL] [-a ADAPTOR] [-db] + [-dl DRAW_LATTICE [DRAW_LATTICE ...]] [-j] [-ppm] + [-b BASELINE] [-p | -vp | -trim | -i] [-li | -re | -rt] {save,github_search} ...\n""" + \ "python -m pyt: error: one of the arguments " + \ "-f/--filepath -gr/--git-repos is required\n" From 72285cd79eda4b56ca5ee4b012e72ef10c0491b5 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 20 Apr 2018 21:04:24 -0700 Subject: [PATCH 017/291] Re-organize a lot of stuff --- .coveragerc | 6 - pyt/__main__.py | 260 +++------------------ pyt/analysis_base.py | 36 --- pyt/constraint_table.py | 6 - pyt/definition_chains.py | 5 +- pyt/draw.py | 230 ------------------ pyt/fixed_point.py | 4 +- pyt/framework_adaptor.py | 8 +- pyt/github_search.py | 255 ++++++++++---------- pyt/lattice.py | 26 +-- pyt/liveness.py | 134 ----------- pyt/reaching_definitions.py | 3 + pyt/reaching_definitions_base.py | 24 +- pyt/reaching_definitions_taint.py | 4 +- pyt/repo_runner.py | 64 ++--- pyt/save.py | 162 ------------- pyt/vulnerabilities.py | 4 +- tests/analysis_base_test_case.py | 21 +- tests/command_line_test.py | 12 +- tests/github_search_test.py | 18 +- tests/lattice_test.py | 143 ------------ tests/liveness_test.py | 33 --- tests/vulnerabilities_across_files_test.py | 15 +- tests/vulnerabilities_test.py | 18 +- tox.ini | 2 +- 25 files changed, 291 insertions(+), 1202 deletions(-) delete mode 100644 pyt/analysis_base.py delete mode 100644 pyt/draw.py delete mode 100644 pyt/liveness.py delete mode 100644 pyt/save.py delete mode 100644 tests/lattice_test.py diff --git a/.coveragerc b/.coveragerc index 3b2dc417..37429140 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,8 +1,5 @@ [report] exclude_lines = - def print_lattice - def print_report - def print_table def valid_date def __repr__ def __str__ @@ -16,10 +13,7 @@ source = ./pyt omit = pyt/__main__.py pyt/definition_chains.py - pyt/draw.py pyt/formatters/json.py pyt/formatters/text.py pyt/github_search.py - pyt/liveness.py pyt/repo_runner.py - pyt/save.py diff --git a/pyt/__main__.py b/pyt/__main__.py index 9d2b7e21..0d23bebc 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -1,10 +1,9 @@ -"""This module is the comand line tool of pyt.""" +"""The comand line module of PyT.""" import argparse import os import sys from datetime import date -from pprint import pprint from .argument_helpers import ( default_blackbox_mapping_file, @@ -15,11 +14,7 @@ ) from .ast_helper import generate_ast from .baseline import get_vulnerabilities_not_in_baseline -from .constraint_table import ( - initialize_constraint_table, - print_table -) -from .draw import draw_cfgs, draw_lattices +from .constraint_table import initialize_constraint_table from .expr_visitor import make_cfg from .fixed_point import analyse from .formatters import ( @@ -33,23 +28,17 @@ is_function, is_function_without_leading_ ) -from .github_search import scan_github, set_github_api_token -from .lattice import print_lattice -from .liveness import LivenessAnalysis -from .project_handler import get_directory_modules, get_modules -from .reaching_definitions import ReachingDefinitionsAnalysis +from .github_search import ( + analyse_repo, + scan_github, + set_github_api_token +) +from .project_handler import ( + get_directory_modules, + get_modules +) from .reaching_definitions_taint import ReachingDefinitionsTaintAnalysis from .repo_runner import get_repos -from .save import ( - cfg_to_file, - create_database, - def_use_chain_to_file, - lattice_to_file, - Output, - use_def_chain_to_file, - verbose_cfg_to_file, - vulnerabilities_to_file -) from .vulnerabilities import find_vulnerabilities @@ -62,7 +51,7 @@ def parse_args(args): entry_group = parser.add_mutually_exclusive_group(required=True) entry_group.add_argument( '-f', '--filepath', - help='Path to the file that should be analysed.', + help='Path to the file that should be analysed.', type=str ) entry_group.add_argument( @@ -77,15 +66,6 @@ def parse_args(args): 'file is not at the root of the project.', type=str ) - parser.add_argument( - '-d', '--draw-cfg', - help='Draw CFG and output as .pdf file.', - action='http://github.com/store_true' - ) - parser.add_argument( - '-o', '--output-filename', - help='Output filename.', type=str - ) parser.add_argument( '-csv', '--csv-path', type=str, help='Give the path of the csv file' @@ -120,26 +100,12 @@ def parse_args(args): help='Choose an adaptor: Flask(Default), Django, Every or Pylons', type=str ) - parser.add_argument( - '-db', '--create-database', - help='Creates a sql file that can be used to' - ' create a database.', - action='http://github.com/store_true' - ) - parser.add_argument( - '-dl', '--draw-lattice', - nargs='+', help='Draws a lattice.' - ) parser.add_argument( '-j', '--json', help='Prints JSON instead of report.', action='http://github.com/store_true', default=False ) - parser.add_argument( - '-ppm', '--print-project-modules', - help='Print project modules.', action='http://github.com/store_true' - ) parser.add_argument( '-b', '--baseline', help='path of a baseline report to compare against ' @@ -149,16 +115,6 @@ def parse_args(args): ) print_group = parser.add_mutually_exclusive_group() - print_group.add_argument( - '-p', '--print', - help='Prints the nodes of the CFG.', - action='http://github.com/store_true' - ) - print_group.add_argument( - '-vp', '--verbose-print', - help='Verbose printing of -p.', - action='http://github.com/store_true' - ) print_group.add_argument( '-trim', '--trim-reassigned-in', help='Trims the reassigned list to the vulnerability chain.', @@ -172,78 +128,6 @@ def parse_args(args): default=False ) - analysis_group = parser.add_mutually_exclusive_group() - analysis_group.add_argument( - '-li', '--liveness', - help='Run liveness analysis. Default is' - ' reaching definitions tainted version.', - action='http://github.com/store_true' - ) - analysis_group.add_argument( - '-re', '--reaching', - help='Run reaching definitions analysis.' - ' Default is reaching definitions' - ' tainted version.', - action='http://github.com/store_true' - ) - analysis_group.add_argument( - '-rt', '--reaching-taint', - help='This is the default analysis:' - ' reaching definitions tainted version.', - action='http://github.com/store_true' - ) - - save_parser = subparsers.add_parser( - 'save', - help='Save menu.' - ) - save_parser.set_defaults(which='save') - save_parser.add_argument( - '-fp', '--filename-prefix', - help='Filename prefix fx file_lattice.pyt', - type=str - ) - save_parser.add_argument( - '-du', '--def-use-chain', - help='Output the def-use chain(s) to file.', - action='http://github.com/store_true' - ) - save_parser.add_argument( - '-ud', '--use-def-chain', - help='Output the use-def chain(s) to file', - action='http://github.com/store_true' - ) - save_parser.add_argument( - '-cfg', '--control-flow-graph', - help='Output the CFGs to file.', - action='http://github.com/store_true' - ) - save_parser.add_argument( - '-vcfg', '--verbose-control-flow-graph', - help='Output the verbose CFGs to file.', - action='http://github.com/store_true' - ) - save_parser.add_argument( - '-an', '--analysis', - help='Output analysis results to file ' - 'in form of a constraint table.', - action='http://github.com/store_true' - ) - save_parser.add_argument( - '-la', '--lattice', help='Output lattice(s) to file.', - action='http://github.com/store_true' - ) - save_parser.add_argument( - '-vu', '--vulnerabilities', - help='Output vulnerabilities to file.', - action='http://github.com/store_true' - ) - save_parser.add_argument( - '-all', '--save-all', - help='Output everything to file.', - action='http://github.com/store_true' - ) - search_parser = subparsers.add_parser( 'github_search', help='Searches through github and runs PyT ' @@ -266,43 +150,9 @@ def parse_args(args): return parser.parse_args(args) -def analyse_repo(args, github_repo, analysis_type, ui_mode): - cfg_list = list() - directory = os.path.dirname(github_repo.path) - project_modules = get_modules(directory) - local_modules = get_directory_modules(directory) - tree = generate_ast(github_repo.path) - cfg = make_cfg( - tree, - project_modules, - local_modules, - github_repo.path - ) - cfg_list.append(cfg) - - initialize_constraint_table(cfg_list) - analyse(cfg_list, analysis_type=analysis_type) - vulnerabilities = find_vulnerabilities( - cfg_list, - analysis_type, - ui_mode, - VulnerabilityFiles( - args.blackbox_mapping_file, - args.trigger_word_file - ) - ) - return vulnerabilities - - def main(command_line_args=sys.argv[1:]): args = parse_args(command_line_args) - analysis = ReachingDefinitionsTaintAnalysis - if args.liveness: - analysis = LivenessAnalysis - elif args.reaching: - analysis = ReachingDefinitionsAnalysis - ui_mode = UImode.NORMAL if args.interactive: ui_mode = UImode.INTERACTIVE @@ -314,7 +164,11 @@ def main(command_line_args=sys.argv[1:]): repos = get_repos(args.git_repos) for repo in repos: repo.clone() - vulnerabilities = analyse_repo(args, repo, analysis, ui_mode) + vulnerabilities = analyse_repo( + args, + repo, + ui_mode + ) if args.json: json.report(vulnerabilities, sys.stdout) else: @@ -323,14 +177,11 @@ def main(command_line_args=sys.argv[1:]): repo.clean_up() exit() - if args.which == 'search': set_github_api_token() scan_github( args.search_string, args.start_date, - analysis, - analyse_repo, args.csv_path, ui_mode, args @@ -339,7 +190,6 @@ def main(command_line_args=sys.argv[1:]): path = os.path.normpath(args.filepath) - directory = None if args.project_root: directory = os.path.normpath(args.project_root) else: @@ -349,14 +199,13 @@ def main(command_line_args=sys.argv[1:]): tree = generate_ast(path, python_2=args.python_2) - cfg_list = list() cfg = make_cfg( tree, project_modules, local_modules, path ) - cfg_list.append(cfg) + cfg_list = list(cfg) framework_route_criteria = is_flask_route_function if args.adaptor: if args.adaptor.lower().startswith('e'): @@ -366,15 +215,20 @@ def main(command_line_args=sys.argv[1:]): elif args.adaptor.lower().startswith('d'): framework_route_criteria = is_django_view_function # Add all the route functions to the cfg_list - FrameworkAdaptor(cfg_list, project_modules, local_modules, framework_route_criteria) + FrameworkAdaptor( + cfg_list, + project_modules, + local_modules, + framework_route_criteria + ) initialize_constraint_table(cfg_list) - - analyse(cfg_list, analysis_type=analysis) - + analyse( + cfg_list, + ReachingDefinitionsTaintAnalysis + ) vulnerabilities = find_vulnerabilities( cfg_list, - analysis, ui_mode, VulnerabilityFiles( args.blackbox_mapping_file, @@ -382,64 +236,16 @@ def main(command_line_args=sys.argv[1:]): ) ) if args.baseline: - vulnerabilities = get_vulnerabilities_not_in_baseline(vulnerabilities, args.baseline) - + vulnerabilities = get_vulnerabilities_not_in_baseline( + vulnerabilities, + args.baseline + ) + if args.json: json.report(vulnerabilities, sys.stdout) else: text.report(vulnerabilities, sys.stdout) - if args.draw_cfg: - if args.output_filename: - draw_cfgs(cfg_list, args.output_filename) - else: - draw_cfgs(cfg_list) - if args.print: - lattice = print_lattice(cfg_list, analysis) - - print_table(lattice) - for i, e in enumerate(cfg_list): - print('############## CFG number: ', i) - print(e) - if args.verbose_print: - for i, e in enumerate(cfg_list): - print('############## CFG number: ', i) - print(repr(e)) - - if args.print_project_modules: - print('############## PROJECT MODULES ##############') - pprint(project_modules) - - if args.create_database: - create_database(cfg_list, vulnerabilities) - if args.draw_lattice: - draw_lattices(cfg_list) - - # Output to file - if args.which == 'save': - if args.filename_prefix: - Output.filename_prefix = args.filename_prefix - if args.save_all: - def_use_chain_to_file(cfg_list) - use_def_chain_to_file(cfg_list) - cfg_to_file(cfg_list) - verbose_cfg_to_file(cfg_list) - lattice_to_file(cfg_list, analysis) - vulnerabilities_to_file(vulnerabilities) - else: - if args.def_use_chain: - def_use_chain_to_file(cfg_list) - if args.use_def_chain: - use_def_chain_to_file(cfg_list) - if args.control_flow_graph: - cfg_to_file(cfg_list) - if args.verbose_control_flow_graph: - verbose_cfg_to_file(cfg_list) - if args.lattice: - lattice_to_file(cfg_list, analysis) - if args.vulnerabilities: - vulnerabilities_to_file(vulnerabilities) - if __name__ == '__main__': main() diff --git a/pyt/analysis_base.py b/pyt/analysis_base.py deleted file mode 100644 index 8a4bbcf6..00000000 --- a/pyt/analysis_base.py +++ /dev/null @@ -1,36 +0,0 @@ -"""This module contains a base class for the analysis component used in PyT.""" - -from abc import ( - ABCMeta, - abstractmethod -) - - -class AnalysisBase(metaclass=ABCMeta): - """Base class for fixed point analyses.""" - - annotated_cfg_nodes = dict() - - def __init__(self, cfg): - self.cfg = cfg - self.build_lattice(cfg) - - @staticmethod - @abstractmethod - def get_lattice_elements(cfg_nodes): - pass - - @abstractmethod - def equal(self, value, other): - """Define the equality for two constraint sets - that are defined by bitvectors.""" - pass - - @abstractmethod - def build_lattice(self, cfg): - pass - - @abstractmethod - def dep(self, q_1): - """Represents the dep mapping from Schwartzbach.""" - pass diff --git a/pyt/constraint_table.py b/pyt/constraint_table.py index de7a0cea..bc7466d7 100644 --- a/pyt/constraint_table.py +++ b/pyt/constraint_table.py @@ -17,9 +17,3 @@ def constraint_join(cfg_nodes): for e in cfg_nodes: r = r | constraint_table[e] return r - - -def print_table(lattice): - print('Constraint table:') - for k, v in constraint_table.items(): - print(str(k) + ': ' + ','.join([str(n) for n in lattice.get_elements(v)])) diff --git a/pyt/definition_chains.py b/pyt/definition_chains.py index 8b813e8b..ad63871b 100644 --- a/pyt/definition_chains.py +++ b/pyt/definition_chains.py @@ -3,7 +3,6 @@ from .constraint_table import constraint_table from .lattice import Lattice from .node_types import AssignmentNode -from .reaching_definitions import ReachingDefinitionsAnalysis from .vars_visitor import VarsVisitor @@ -38,7 +37,7 @@ def get_constraint_nodes(node, lattice): def build_use_def_chain(cfg_nodes): use_def = dict() - lattice = Lattice(cfg_nodes, ReachingDefinitionsAnalysis) + lattice = Lattice(cfg_nodes) for node in cfg_nodes: definitions = list() @@ -53,7 +52,7 @@ def build_use_def_chain(cfg_nodes): def build_def_use_chain(cfg_nodes): def_use = dict() - lattice = Lattice(cfg_nodes, ReachingDefinitionsAnalysis) + lattice = Lattice(cfg_nodes) # For every node for node in cfg_nodes: diff --git a/pyt/draw.py b/pyt/draw.py deleted file mode 100644 index 7dbf378a..00000000 --- a/pyt/draw.py +++ /dev/null @@ -1,230 +0,0 @@ -"""Draws CFG.""" - -import argparse -from itertools import permutations -from subprocess import call - -from graphviz import Digraph - -from .node_types import AssignmentNode - - -IGNORED_LABEL_NAME_CHARACHTERS = ':' - -cfg_styles = { - 'graph': { - 'fontsize': '16', - 'fontcolor': 'black', - 'bgcolor': 'transparent', - 'rankdir': 'TB', - 'splines': 'ortho', - 'margin': '0.01', - }, - 'nodes': { - 'fontname': 'Gotham', - 'shape': 'box', - 'fontcolor': 'black', - 'color': 'black', - 'style': 'filled', - 'fillcolor': 'transparent', - }, - 'edges': { - 'style': 'filled', - 'color': 'black', - 'arrowhead': 'normal', - 'fontname': 'Courier', - 'fontsize': '12', - 'fontcolor': 'black', - } -} - -lattice_styles = { - 'graph': { - 'fontsize': '16', - 'fontcolor': 'black', - 'bgcolor': 'transparent', - 'rankdir': 'TB', - 'splines': 'line', - 'margin': '0.01', - 'ranksep': '1', - }, - 'nodes': { - 'fontname': 'Gotham', - 'shape': 'none', - 'fontcolor': 'black', - 'color': 'black', - 'style': 'filled', - 'fillcolor': 'transparent', - }, - 'edges': { - 'style': 'filled', - 'color': 'black', - 'arrowhead': 'none', - 'fontname': 'Courier', - 'fontsize': '12', - 'fontcolor': 'black', - } -} - - -def apply_styles(graph, styles): - """Apply styles to graph.""" - graph.graph_attr.update( - ('graph' in styles and styles['graph']) or {} - ) - graph.node_attr.update( - ('nodes' in styles and styles['nodes']) or {} - ) - graph.edge_attr.update( - ('edges' in styles and styles['edges']) or {} - ) - return graph - - -def draw_cfg(cfg, output_filename='output'): - """Draw CFG and output as pdf.""" - graph = Digraph(format='pdf') - - for node in cfg.nodes: - stripped_label = node.label.replace(IGNORED_LABEL_NAME_CHARACHTERS, '') - - if 'Exit' in stripped_label: - graph.node(stripped_label, 'Exit', shape='none') - elif 'Entry' in stripped_label: - graph.node(stripped_label, 'Entry', shape='none') - else: - graph.node(stripped_label, stripped_label) - - for ingoing_node in node.ingoing: - graph.edge(ingoing_node.label.replace( - IGNORED_LABEL_NAME_CHARACHTERS, ''), stripped_label) - - graph = apply_styles(graph, cfg_styles) - graph.render(filename=output_filename) - - -class Node(): - def __init__(self, s, parent, children=None): - self.s = s - self.parent = parent - self.children = children - - def __str__(self): - return 'Node: ' + str(self.s) + ' Parent: ' + str(self.parent) + ' Children: ' + str(self.children) - - def __hash__(self): - return hash(str(self.s)) - - -def draw_node(l, graph, node): - node_label = str(node.s) - graph.node(node_label, node_label) - for child in node.children: - child_label = str(child.s) - graph.node(child_label, child_label) - if not (node_label, child_label) in l: - graph.edge(node_label, child_label, ) - l.append((node_label, child_label)) - draw_node(l, graph, child) - - -def make_lattice(s, length): - p = Node(s, None) - p.children = get_children(p, s, length) - return p - - -def get_children(p, s, length): - children = set() - if length < 0: - return children - for subset in permutations(s, length): - setsubset = set(subset) - append = True - for node in children: - if setsubset == node.s: - append = False - break - if append: - n = Node(setsubset, p) - n.children = get_children(n, setsubset, length-1) - children.add(n) - return children - - -def add_anchor(filename): - filename += '.dot' - out = list() - delimiter = '->' - with open(filename, 'r') as fd: - for line in fd: - if delimiter in line: - s = line.split(delimiter) - ss = s[0][:-1] - s[0] = ss + ':s ' - ss = s[1][:-1] - s[1] = ss + ':n\n' - s.insert(1, delimiter) - out.append(''.join(s)) - elif 'set()' in line: - out.append('"set()" [label="{}"]') - else: - out.append(line) - with open(filename, 'w') as fd: - for line in out: - fd.write(line) - - -def run_dot(filename): - filename += '.dot' - call(['dot', '-Tpdf', filename, '-o', filename.replace('.dot', '.pdf')]) - - -def draw_lattice(cfg, output_filename='output'): - """Draw CFG and output as pdf.""" - graph = Digraph(format='pdf') - - ll = [s.label for s in cfg.nodes if isinstance(s, AssignmentNode)] - root = make_lattice(ll, len(ll)-1) - l = list() - draw_node(l, graph, root) - - graph = apply_styles(graph, lattice_styles) - graph.render(filename=output_filename+'.dot') - - add_anchor(output_filename) - run_dot(output_filename) - - -def draw_lattice_from_labels(labels, output_filename): - graph = Digraph(format='pdf') - - root = make_lattice(labels, len(labels)-1) - l = list() - draw_node(l, graph, root) - - graph = apply_styles(graph, lattice_styles) - graph.render(filename=output_filename+'.dot') - - add_anchor(output_filename) - run_dot(output_filename) - - -def draw_lattices(cfg_list, output_prefix='output'): - for i, cfg in enumerate(cfg_list): - draw_lattice(cfg, output_prefix + '_' + str(i)) - - -def draw_cfgs(cfg_list, output_prefix='output'): - for i, cfg in enumerate(cfg_list): - draw_cfg(cfg, output_prefix + '_' + str(i)) - - -parser = argparse.ArgumentParser() -parser.add_argument('-l', '--labels', nargs='+', - help='Set of labels in lattice.') -parser.add_argument('-n', '--name', help='Specify filename.', type=str) -if __name__ == '__main__': - args = parser.parse_args() - - draw_lattice_from_labels(args.labels, args.name) diff --git a/pyt/fixed_point.py b/pyt/fixed_point.py index b0574934..de148518 100644 --- a/pyt/fixed_point.py +++ b/pyt/fixed_point.py @@ -9,7 +9,7 @@ def __init__(self, cfg, analysis): """Fixed point analysis. Analysis must be a dataflow analysis containing a 'fixpointmethod' - method that analyses one CFG node.""" + method that analyses one CFG.""" self.analysis = analysis(cfg) self.cfg = cfg @@ -22,7 +22,7 @@ def fixpoint_runner(self): self.analysis.fixpointmethod(q[0]) # y = F_i(x_1, ..., x_n); y = constraint_table[q[0]] # y = q[0].new_constraint - if not self.analysis.equal(y, x_i): + if y != x_i: for node in self.analysis.dep(q[0]): # for (v in dep(v_i)) q.append(node) # q.append(v): constraint_table[q[0]] = y # q[0].old_constraint = q[0].new_constraint # x_i = y diff --git a/pyt/framework_adaptor.py b/pyt/framework_adaptor.py index c7e5119d..c2a49ef9 100644 --- a/pyt/framework_adaptor.py +++ b/pyt/framework_adaptor.py @@ -16,7 +16,13 @@ class FrameworkAdaptor(): entry points in a framework and then taints their arguments. """ - def __init__(self, cfg_list, project_modules, local_modules, is_route_function): + def __init__( + self, + cfg_list, + project_modules, + local_modules, + is_route_function + ): self.cfg_list = cfg_list self.project_modules = project_modules self.local_modules = local_modules diff --git a/pyt/github_search.py b/pyt/github_search.py index df0cb40c..0dc1e953 100644 --- a/pyt/github_search.py +++ b/pyt/github_search.py @@ -1,3 +1,4 @@ +import os import re import requests import time @@ -5,9 +6,19 @@ from datetime import date, datetime, timedelta from . import repo_runner +from .argument_helpers import VulnerabilityFiles +from .ast_helper import generate_ast +from .constraint_table import initialize_constraint_table +from .expr_visitor import make_cfg +from .fixed_point import analyse +from .formatters import json +from .project_handler import ( + get_directory_modules, + get_modules +) from .reaching_definitions_taint import ReachingDefinitionsTaintAnalysis from .repo_runner import add_repo_to_csv, NoEntryPathError -from .save import save_repo_scan +from .vulnerabilities import find_vulnerabilities DEFAULT_TIMEOUT_IN_SECONDS = 60 @@ -21,8 +32,10 @@ def set_github_api_token(): global GITHUB_OAUTH_TOKEN try: - GITHUB_OAUTH_TOKEN = open('github_access_token.pyt', - 'r').read().strip() + GITHUB_OAUTH_TOKEN = open( + 'github_access_token.pyt', + 'r' + ).read().strip() except FileNotFoundError: print('Insert your GitHub access token' ' in the github_access_token.pyt file in the pyt package' @@ -30,25 +43,26 @@ def set_github_api_token(): exit(0) -class Languages: - _prefix = 'language:' - python = _prefix + 'python' - javascript = _prefix + 'javascript' - # add others here - - class Query: - def __init__(self, base_url, search_string, - language=None, repo=None, time_interval=None, per_page=100): + def __init__( + self, + base_url, + search_string, + repo=None, + time_interval=None, + per_page=100 + ): repo = self._repo_parameter(repo) time_interval = self._time_interval_parameter(time_interval) search_string = self._search_parameter(search_string) per_page = self._per_page_parameter(per_page) - parameters = self._construct_parameters([search_string, - language, - repo, - time_interval, - per_page]) + parameters = self._construct_parameters([ + search_string, + 'language:python', + repo, + time_interval, + per_page + ]) self.query_string = self._construct_query(base_url, parameters) def _construct_query(self, base_url, parameters): @@ -106,10 +120,14 @@ def append(self, request_time): else: delta = request_time - self.counter[0] if delta.seconds < self.timeout_in_seconds: - print('Maximum requests "{}" reached' - ' timing out for {} seconds.' - .format(len(self.counter), - self.timeout_in_seconds - delta.seconds)) + print( + 'Maximum requests "{}" reached' + ' timing out for {} seconds.' + .format( + len(self.counter), + self.timeout_in_seconds - delta.seconds + ) + ) self.timeout(self.timeout_in_seconds - delta.seconds) self.counter.pop(0) # pop index 0 self.counter.append(datetime.now()) @@ -138,22 +156,22 @@ def _request(self, query_string): headers = {'Authorization': 'token ' + GITHUB_OAUTH_TOKEN} r = requests.get(query_string, headers=headers) - json = r.json() + response_body = r.json() if r.status_code != 200: print('Bad request:') print(r.status_code) - print(json) + print(response_body) Search.request_counter.timeout() self._request(query_string) return - self.total_count = json['total_count'] + self.total_count = response_body['total_count'] print('Number of results: {}.'.format(self.total_count)) - self.incomplete_results = json['incomplete_results'] + self.incomplete_results = response_body['incomplete_results'] if self.incomplete_results: raise IncompleteResultsError() - self.parse_results(json['items']) + self.parse_results(response_body['items']) @abstractmethod def parse_results(self, json_results): @@ -173,106 +191,103 @@ def parse_results(self, json_results): class File: - def __init__(self, json): - self.name = json['name'] - self.repo = Repo(json['repository']) + def __init__(self, item): + self.name = item['name'] + self.repo = Repo(item['repository']) class Repo: - def __init__(self, json): - self.url = json['html_url'] - self.name = json['full_name'] + def __init__(self, item): + self.url = item['html_url'] + self.name = item['full_name'] -def get_dates(start_date, end_date=date.today(), interval=7): +def get_dates( + start_date, + end_date=date.today() +): + interval = 7 delta = end_date - start_date - for i in range(delta.days // interval): - yield (start_date + timedelta(days=(i * interval) - interval), - start_date + timedelta(days=i * interval)) - else: - # Take care of the remainder of days - yield (start_date + timedelta(days=i * interval), - start_date + timedelta(days=i * interval + - interval + - delta.days % interval)) - - -def scan_github(search_string, start_date, analysis_type, analyse_repo_func, csv_path, ui_mode, other_args): - analyse_repo = analyse_repo_func - for d in get_dates(start_date, interval=7): - q = Query(SEARCH_REPO_URL, search_string, - language=Languages.python, - time_interval=str(d[0]) + ' .. ' + str(d[1]), - per_page=100) - s = SearchRepo(q) - for repo in s.results: - q = Query(SEARCH_CODE_URL, 'app = Flask(__name__)', - Languages.python, repo) - s = SearchCode(q) - if s.results: - r = repo_runner.Repo(repo.url) + for i in range((delta.days // interval) + 1): + yield ( + start_date + timedelta(days=i * interval), + start_date + timedelta(days=i * interval + interval) + ) + + +def analyse_repo( + args, + github_repo, + ui_mode +): + cfg_list = list() + directory = os.path.dirname(github_repo.path) + project_modules = get_modules(directory) + local_modules = get_directory_modules(directory) + tree = generate_ast(github_repo.path) + cfg = make_cfg( + tree, + project_modules, + local_modules, + github_repo.path + ) + cfg_list.append(cfg) + + initialize_constraint_table(cfg_list) + analyse( + cfg_list, + ReachingDefinitionsTaintAnalysis + ) + vulnerabilities = find_vulnerabilities( + cfg_list, + ui_mode, + VulnerabilityFiles( + args.blackbox_mapping_file, + args.trigger_word_file + ) + ) + return vulnerabilities + + +def scan_github( + search_string, + start_date, + csv_path, + ui_mode, + other_args +): + for range_start, range_end in get_dates(start_date): + query = Query( + SEARCH_REPO_URL, + search_string, + time_interval='{} .. {}'.format( + range_start, + range_end + ), + per_page=100 + ) + search_repos = SearchRepo(query) + for repo in search_repos.results: + query = Query( + SEARCH_CODE_URL, + 'app = Flask(__name__)', + repo + ) + search_code = SearchCode(query) + if search_code.results: + repo = repo_runner.Repo(repo.url) try: - r.clone() + repo.clone() except NoEntryPathError as err: - save_repo_scan(repo, r.path, vulnerabilities=None, error=err) + print('NoEntryPathError for {}'.format(repo.url)) continue - except: - save_repo_scan(repo, r.path, vulnerabilities=None, error='Other Error Unknown while cloning :-(') - continue - try: - vulnerabilities = analyse_repo(other_args, r, analysis_type, ui_mode) - if vulnerabilities: - save_repo_scan(repo, r.path, vulnerabilities) - add_repo_to_csv(csv_path, r) - else: - save_repo_scan(repo, r.path, vulnerabilities=None) - r.clean_up() - except SyntaxError as err: - save_repo_scan(repo, r.path, vulnerabilities=None, error=err) - except IOError as err: - save_repo_scan(repo, r.path, vulnerabilities=None, error=err) - except AttributeError as err: - save_repo_scan(repo, r.path, vulnerabilities=None, error=err) - except: - save_repo_scan(repo, r.path, vulnerabilities=None, error='Other Error Unknown :-(') - -if __name__ == '__main__': - for x in get_dates(date(2010, 1, 1), interval=93): - print(x) - exit() - scan_github('flask', ReachingDefinitionsTaintAnalysis) - exit() - q = Query(SEARCH_REPO_URL, 'flask') - s = SearchRepo(q) - for repo in s.results[:3]: - q = Query(SEARCH_CODE_URL, 'app = Flask(__name__)', Languages.python, repo) - s = SearchCode(q) - r = repo_runner.Repo(repo.url) - r.clone() - print(r.path) - r.clean_up() - print(repo.name) - print(len(s.results)) - print([f.name for f in s.results]) - exit() - - r = RequestCounter('test', timeout=2) - for x in range(15): - r.append(datetime.now()) - exit() - - dates = get_dates(date(2010, 1, 1)) - for date in dates: - q = Query(SEARCH_REPO_URL, 'flask', - time_interval=str(date) + ' .. ' + str(date)) - print(q.query_string) - exit() - s = SearchRepo(q) - print(s.total_count) - print(s.incomplete_results) - print([r.URL for r in s.results]) - q = Query(SEARCH_CODE_URL, 'import flask', Languages.python, s.results[0]) - s = SearchCode(q) - #print(s.total_count) - #print(s.incomplete_results) - #print([f.name for f in s.results]) + vulnerabilities = analyse_repo( + other_args, + repo, + ui_mode + ) + with open(repo.path + '.pyt', 'a') as fd: + json.report(vulnerabilities, fd) + if vulnerabilities: + add_repo_to_csv(csv_path, repo) + repo.clean_up() diff --git a/pyt/lattice.py b/pyt/lattice.py index 125a8c2a..f4dd531f 100644 --- a/pyt/lattice.py +++ b/pyt/lattice.py @@ -1,11 +1,21 @@ from .constraint_table import constraint_table +from .node_types import AssignmentNode + + +def get_lattice_elements(cfg_nodes): + """Returns all assignment nodes as they are the only lattice elements + in the reaching definitions analysis. + """ + for node in cfg_nodes: + if isinstance(node, AssignmentNode): + yield node class Lattice: - def __init__(self, cfg_nodes, analysis_type): + def __init__(self, cfg_nodes): self.el2bv = dict() # Element to bitvector dictionary self.bv2el = list() # Bitvector to element list - for i, e in enumerate(analysis_type.get_lattice_elements(cfg_nodes)): + for i, e in enumerate(get_lattice_elements(cfg_nodes)): # Give each element a unique shift of 1 self.el2bv[e] = 0b1 << i self.bv2el.insert(0, e) @@ -37,15 +47,3 @@ def in_constraint(self, node1, node2): return False return constraint & value != 0 - - -def print_lattice(cfg_list, analysis_type): - nodes = list() - for cfg in cfg_list: - nodes.extend(cfg.nodes) - l = Lattice(nodes, analysis_type) - - print('Lattice:') - for k, v in l.el2bv.items(): - print(str(k) + ': ' + str(v)) - return l diff --git a/pyt/liveness.py b/pyt/liveness.py deleted file mode 100644 index 38935b94..00000000 --- a/pyt/liveness.py +++ /dev/null @@ -1,134 +0,0 @@ -import ast - -from .analysis_base import AnalysisBase -from .ast_helper import get_call_names_as_string -from .constraint_table import ( - constraint_join, - constraint_table -) -from .lattice import Lattice -from .node_types import ( - AssignmentNode, - BBorBInode, - EntryOrExitNode -) -from .vars_visitor import VarsVisitor - - -class LivenessAnalysis(AnalysisBase): - """Reaching definitions analysis rules implemented.""" - - def __init__(self, cfg): - super().__init__(cfg) - - def join(self, cfg_node): - """Joins all constraints of the ingoing nodes and returns them. - This represents the JOIN auxiliary definition from Schwartzbach.""" - return constraint_join(cfg_node.outgoing) - - def is_output(self, cfg_node): - if isinstance(cfg_node.ast_node, ast.Call): - call_name = get_call_names_as_string(cfg_node.ast_node.func) - if 'print' in call_name: - return True - return False - - def is_condition(self, cfg_node): - if isinstance(cfg_node.ast_node, (ast.If, ast.While)): - return True - elif self.is_output(cfg_node): - return True - return False - - def remove_id_assignment(self, JOIN, cfg_node): - lvars = list() - - if isinstance(cfg_node, BBorBInode): - lvars.append(cfg_node.left_hand_side) - else: - try: - for expr in cfg_node.ast_node.targets: - vv = VarsVisitor() - vv.visit(expr) - lvars.extend(vv.result) - except AttributeError: # If it is AugAssign - vv = VarsVisitor() - vv.visit(cfg_node.ast_node.target) - lvars.extend(vv.result) - for var in lvars: - if var in self.lattice.get_elements(JOIN): - # Remove var from JOIN - JOIN = JOIN ^ self.lattice.el2bv[var] - return JOIN - - def add_vars_assignment(self, JOIN, cfg_node): - rvars = list() - if isinstance(cfg_node, BBorBInode): - # A conscience decision was made not to include e.g. ~call_N's in RHS vars - rvars.extend(cfg_node.right_hand_side_variables) - else: - vv = VarsVisitor() - vv.visit(cfg_node.ast_node.value) - rvars.extend(vv.result) - for var in rvars: - # Add var to JOIN - JOIN = JOIN | self.lattice.el2bv[var] - return JOIN - - def add_vars_conditional(self, JOIN, cfg_node): - varse = None - if isinstance(cfg_node.ast_node, ast.While): - vv = VarsVisitor() - vv.visit(cfg_node.ast_node.test) - varse = vv.result - elif self.is_output(cfg_node): - vv = VarsVisitor() - vv.visit(cfg_node.ast_node) - varse = vv.result - elif isinstance(cfg_node.ast_node, ast.If): - vv = VarsVisitor() - vv.visit(cfg_node.ast_node.test) - varse = vv.result - - for var in varse: - JOIN = JOIN | self.lattice.el2bv[var] - - return JOIN - - def fixpointmethod(self, cfg_node): - if isinstance(cfg_node, EntryOrExitNode) and 'Exit' in cfg_node.label: - constraint_table[cfg_node] = 0 - elif isinstance(cfg_node, AssignmentNode): - JOIN = self.join(cfg_node) - JOIN = self.remove_id_assignment(JOIN, cfg_node) - JOIN = self.add_vars_assignment(JOIN, cfg_node) - constraint_table[cfg_node] = JOIN - elif self.is_condition(cfg_node): - JOIN = self.join(cfg_node) - JOIN = self.add_vars_conditional(JOIN, cfg_node) - constraint_table[cfg_node] = JOIN - else: - constraint_table[cfg_node] = self.join(cfg_node) - - def dep(self, q_1): - """Represents the dep mapping from Schwartzbach.""" - for node in q_1.outgoing: - yield node - - def get_lattice_elements(cfg_nodes): - """Returns all variables as they are the only lattice elements - in the liveness analysis. - This is a static method which is overwritten from the base class.""" - lattice_elements = set() # set() to avoid duplicates - for node in (node for node in cfg_nodes if node.ast_node): - vv = VarsVisitor() - vv.visit(node.ast_node) - for var in vv.result: - lattice_elements.add(var) - return lattice_elements - - def equal(self, value, other): - return value == other - - def build_lattice(self, cfg): - self.lattice = Lattice(cfg.nodes, LivenessAnalysis) diff --git a/pyt/reaching_definitions.py b/pyt/reaching_definitions.py index 3bf5d075..299339d2 100644 --- a/pyt/reaching_definitions.py +++ b/pyt/reaching_definitions.py @@ -7,6 +7,9 @@ class ReachingDefinitionsAnalysis(ReachingDefinitionsAnalysisBase): """Reaching definitions analysis rules implemented.""" def fixpointmethod(self, cfg_node): + """Regular reaching definitions. + Used in definition_chains.py + """ JOIN = self.join(cfg_node) # Assignment check if isinstance(cfg_node, AssignmentNode): diff --git a/pyt/reaching_definitions_base.py b/pyt/reaching_definitions_base.py index 5fecaa6a..f2dec1dd 100644 --- a/pyt/reaching_definitions_base.py +++ b/pyt/reaching_definitions_base.py @@ -1,14 +1,13 @@ -from .analysis_base import AnalysisBase from .constraint_table import constraint_join from .lattice import Lattice -from .node_types import AssignmentNode -class ReachingDefinitionsAnalysisBase(AnalysisBase): +class ReachingDefinitionsAnalysisBase(): """Reaching definitions analysis rules implemented.""" def __init__(self, cfg): - super().__init__(cfg) + self.cfg = cfg + self.lattice = Lattice(cfg.nodes) def join(self, cfg_node): """Joins all constraints of the ingoing nodes and returns them. @@ -24,24 +23,7 @@ def arrow(self, JOIN, _id): r = r ^ self.lattice.el2bv[node] return r - def fixpointmethod(self, cfg_node): - raise NotImplementedError() - def dep(self, q_1): """Represents the dep mapping from Schwartzbach.""" for node in q_1.outgoing: yield node - - def get_lattice_elements(cfg_nodes): - """Returns all assignment nodes as they are the only lattice elements - in the reaching definitions analysis. - This is a static method which is overwritten from the base class.""" - for node in cfg_nodes: - if isinstance(node, AssignmentNode): - yield node - - def equal(self, value, other): - return value == other - - def build_lattice(self, cfg): - self.lattice = Lattice(cfg.nodes, ReachingDefinitionsAnalysisBase) diff --git a/pyt/reaching_definitions_taint.py b/pyt/reaching_definitions_taint.py index 3ca72acc..b2c2e1d7 100644 --- a/pyt/reaching_definitions_taint.py +++ b/pyt/reaching_definitions_taint.py @@ -4,9 +4,11 @@ class ReachingDefinitionsTaintAnalysis(ReachingDefinitionsAnalysisBase): - """Reaching definitions analysis rules implemented.""" def fixpointmethod(self, cfg_node): + """The most important part of PyT, where we perform + the variant of reaching definitions to find where sources reach. + """ JOIN = self.join(cfg_node) # Assignment check if isinstance(cfg_node, AssignmentNode): diff --git a/pyt/repo_runner.py b/pyt/repo_runner.py index cfb63290..7d6acba9 100644 --- a/pyt/repo_runner.py +++ b/pyt/repo_runner.py @@ -1,4 +1,4 @@ -"""This modules runs PyT on a CSV file of git repos.""" +"""Runs PyT on a CSV file of git repos.""" import os import shutil @@ -14,24 +14,25 @@ class NoEntryPathError(Exception): class Repo: """Holder for a repo with git URL and - a path to where the analysis should start""" + a path to where the analysis should start.""" - def __init__(self, URL, path=None): + def __init__( + self, + URL, + path=None + ): self.URL = URL.strip() - if path: - self.path = path.strip() - else: - self.path = None self.directory = None + self.path = path.strip() if path else None def clone(self): - """Clone repo and update path to match the current one""" + """Clone repo and update path to match the current one.""" - r = self.URL.split('/')[-1].split('.') - if len(r) > 1: - self.directory = '.'.join(r[:-1]) + repo = self.URL.split('/')[-1].split('.') + if len(repo) > 1: + self.directory = '.'.join(repo[:-1]) else: - self.directory = r[0] + self.directory = repo[0] if self.directory not in os.listdir(): git.Git().clone(self.URL) @@ -40,9 +41,7 @@ def clone(self): self._find_entry_path() elif self.path[0] == '/': self.path = self.path[1:] - self.path = os.path.join(self.directory, self.path) - else: - self.path = os.path.join(self.directory, self.path) + self.path = os.path.join(self.directory, self.path) def _find_entry_path(self): for root, dirs, files in os.walk(self.directory): @@ -52,8 +51,10 @@ def _find_entry_path(self): if 'app = Flask(__name__)' in fd.read(): self.path = os.path.join(root, f) return - raise NoEntryPathError('No entry path found in repo {}.' - .format(self.URL)) + raise NoEntryPathError( + 'No entry path found in repo {}.' + .format(self.URL) + ) def clean_up(self): """Deletes the repo""" @@ -70,21 +71,22 @@ def get_repos(csv_path): return repos -def add_repo_to_file(path, repo): +def add_repo_to_csv( + repo, + csv_path=DEFAULT_CSV_PATH +): try: - with open(path, 'a') as fd: - fd.write('{}{}, {}' - .format(os.linesep, repo.URL, repo.path)) + with open(csv_path, 'a') as fd: + fd.write( + '{}{}, {}'.format( + os.linesep, + repo.URL, + repo.path + ) + ) except FileNotFoundError: - print('-csv handle not used and fallback path not found: {}' + print('-csv file not used and fallback path not found: {}' .format(DEFAULT_CSV_PATH)) - print('You need to specify the csv_path' - ' by using the "-csv" handle.') + print('To specify the csv_path ' + 'use the "-csv" option.') exit(1) - - -def add_repo_to_csv(csv_path, repo): - if csv_path is None: - add_repo_to_file(DEFAULT_CSV_PATH, repo) - else: - add_repo_to_file(csv_path, repo) diff --git a/pyt/save.py b/pyt/save.py deleted file mode 100644 index 5f0e3759..00000000 --- a/pyt/save.py +++ /dev/null @@ -1,162 +0,0 @@ -import os -from datetime import datetime - -from .definition_chains import ( - build_def_use_chain, - build_use_def_chain -) -from .formatters import text -from .lattice import Lattice -from .node_types import Node - - -database_file_name = 'db.sql' -nodes_table_name = 'nodes' -vulnerabilities_table_name = 'vulnerabilities' - -def create_nodes_table(): - with open(database_file_name, 'a') as fd: - fd.write('DROP TABLE IF EXISTS ' + nodes_table_name + '\n') - fd.write('CREATE TABLE ' + nodes_table_name + '(id int,label varchar(255),line_number int, path varchar(255));') - -def create_vulnerabilities_table(): - with open(database_file_name, 'a') as fd: - fd.write('DROP TABLE IF EXISTS ' + vulnerabilities_table_name + '\n') - fd.write('CREATE TABLE ' + vulnerabilities_table_name + '(id int, source varchar(255), source_word varchar(255), sink varchar(255), sink_word varchar(255));') - -def quote(item): - if isinstance(item, Node): - item = item.label - return "'" + item.replace("'", "''") + "'" - -def insert_vulnerability(vulnerability): - with open(database_file_name, 'a') as fd: - fd.write('\nINSERT INTO ' + vulnerabilities_table_name + '\n') - fd.write('VALUES (') - fd.write(quote(vulnerability.__dict__['source']) + ',') - fd.write(quote(vulnerability.__dict__['source_trigger_word']) + ',') - fd.write(quote(vulnerability.__dict__['sink']) + ',') - fd.write(quote(vulnerability.__dict__['sink_trigger_word'])) - fd.write(');') - -def insert_node(node): - with open(database_file_name, 'a') as fd: - fd.write('\nINSERT INTO ' + nodes_table_name + '\n') - fd.write('VALUES (') - fd.write("'" + node.__dict__['label'].replace("'", "''") + "'" + ',') - line_number = node.__dict__['line_number'] - if line_number: - fd.write(str(line_number) + ',') - else: - fd.write('NULL,') - path = node.__dict__['path'] - if path: - fd.write("'" + path.replace("'", "''") + "'") - else: - fd.write('NULL') - fd.write(');') - -def create_database(cfg_list, vulnerabilities): - create_nodes_table() - for cfg in cfg_list: - for node in cfg.nodes: - insert_node(node) - create_vulnerabilities_table() - for vulnerability in vulnerabilities: - insert_vulnerability(vulnerability) - - -class Output(): - filename_prefix = None - - def __init__(self, title): - if Output.filename_prefix: - self.title = Output.filename_prefix + '_' + title - else: - self.title = title - - def __enter__(self): - self.fd = open(self.title, 'w') - return self.fd - - def __exit__(self, type, value, traceback): - self.fd.close() - - -def def_use_chain_to_file(cfg_list): - with Output('def-use_chain.pyt') as fd: - for i, cfg in enumerate(cfg_list): - fd.write('##### Def-use chain for CFG {} #####{}' - .format(i, os.linesep)) - def_use = build_def_use_chain(cfg.nodes) - for k, v in def_use.items(): - fd.write('Def: {} -> Use: [{}]{}' - .format(k.label, - ', '.join([n.label for n in v]), - os.linesep)) - - -def use_def_chain_to_file(cfg_list): - with Output('use-def_chain.pyt') as fd: - for i, cfg in enumerate(cfg_list): - fd.write('##### Use-def chain for CFG {} #####{}' - .format(i, os.linesep)) - def_use = build_use_def_chain(cfg.nodes) - for k, v in def_use.items(): - fd.write('Use: {} -> Def: [{}]{}' - .format(k.label, - ', '.join([n[1].label for n in v]), - os.linesep)) - - -def cfg_to_file(cfg_list): - with Output('control_flow_graph.pyt') as fd: - for i, cfg in enumerate(cfg_list): - fd.write('##### CFG {} #####{}'.format(i, os.linesep)) - for i, node in enumerate(cfg.nodes): - fd.write('Node {}: {}{}'.format(i, node.label, os.linesep)) - - -def verbose_cfg_to_file(cfg_list): - with Output('verbose_control_flow_graph.pyt') as fd: - for i, cfg in enumerate(cfg_list): - fd.write('##### CFG {} #####{}'.format(i, os.linesep)) - for i, node in enumerate(cfg.nodes): - fd.write('Node {}: {}{}'.format(i, repr(node), os.linesep)) - - -def lattice_to_file(cfg_list, analysis_type): - with Output('lattice.pyt') as fd: - for i, cfg in enumerate(cfg_list): - fd.write('##### Lattice for CFG {} #####{}'.format(i, os.linesep)) - l = Lattice(cfg.nodes, analysis_type) - - fd.write('# Elements to bitvector #{}'.format(os.linesep)) - for k, v in l.el2bv.items(): - fd.write('{} -> {}{}'.format(str(k), bin(v), os.linesep)) - - fd.write('# Bitvector to elements #{}'.format(os.linesep)) - for k, v in l.el2bv.items(): - fd.write('{} -> {}{}'.format(bin(v), str(k), os.linesep)) - - -def vulnerabilities_to_file(vulnerabilities): - with Output('vulnerabilities.pyt') as fd: - text.report(vulnerabilities, fd) - - -def save_repo_scan(repo, entry_path, vulnerabilities, error=None): - with open('scan.pyt', 'a') as fd: - fd.write('{}{}'.format(repo.name, os.linesep)) - fd.write('{}{}'.format(repo.url, os.linesep)) - fd.write('Entry file: {}{}'.format(entry_path, os.linesep)) - fd.write('Scanned: {}{}'.format(datetime.now(), os.linesep)) - if vulnerabilities: - text.report(vulnerabilities, fd) - else: - fd.write('No vulnerabilities found.{}'.format(os.linesep)) - if error: - fd.write('An Error occurred while scanning the repo: {}' - .format(str(error))) - fd.write(os.linesep) - fd.write(os.linesep) diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index ba7debff..ca4c1ca6 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -501,7 +501,6 @@ def find_vulnerabilities_in_cfg( def find_vulnerabilities( cfg_list, - analysis_type, ui_mode, vulnerability_files ): @@ -509,7 +508,6 @@ def find_vulnerabilities( Args: cfg_list(list[CFG]): the list of CFGs to scan. - analysis_type(AnalysisBase): analysis object used to create lattice. ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. vulnerability_files(VulnerabilityFiles): contains trigger words and blackbox_mapping files @@ -525,7 +523,7 @@ def find_vulnerabilities( find_vulnerabilities_in_cfg( cfg, definitions, - Lattice(cfg.nodes, analysis_type), + Lattice(cfg.nodes), ui_mode, blackbox_mapping, vulnerabilities diff --git a/tests/analysis_base_test_case.py b/tests/analysis_base_test_case.py index cdd33182..8e8bb37d 100644 --- a/tests/analysis_base_test_case.py +++ b/tests/analysis_base_test_case.py @@ -1,4 +1,3 @@ -import unittest from collections import namedtuple from .base_test_case import BaseTestCase @@ -15,6 +14,7 @@ class AnalysisBaseTestCase(BaseTestCase): 'element' ) ) + def setUp(self): self.cfg = None @@ -29,21 +29,32 @@ def assertInCfg(self, connections, lattice): lattice(Lattice): The lattice we're analysing. """ for connection in connections: - self.assertEqual(lattice.in_constraint(self.cfg.nodes[connection[0]], self.cfg.nodes[connection[1]]), True, str(connection) + " expected to be connected") + self.assertEqual(lattice.in_constraint( + self.cfg.nodes[connection[0]], + self.cfg.nodes[connection[1]]), + True, + str(connection) + " expected to be connected") nodes = len(self.cfg.nodes) for element in range(nodes): for sets in range(nodes): if (element, sets) not in connections: - self.assertEqual(lattice.in_constraint(self.cfg.nodes[element], self.cfg.nodes[sets]), False, "(%s,%s)" % (self.cfg.nodes[element], self.cfg.nodes[sets]) + " expected to be disconnected") + self.assertEqual( + lattice.in_constraint( + self.cfg.nodes[element], + self.cfg.nodes[sets] + ), + False, + "(%s,%s)" % (self.cfg.nodes[element], self.cfg.nodes[sets]) + " expected to be disconnected" + ) def constraints(self, list_of_constraints, node_number): for c in list_of_constraints: - yield (c,node_number) + yield (c, node_number) def run_analysis(self, path, analysis_type): self.cfg_create_from_file(path) initialize_constraint_table([self.cfg]) self.analysis = FixedPointAnalysis(self.cfg, analysis_type) self.analysis.fixpoint_runner() - return Lattice(self.cfg.nodes, analysis_type) + return Lattice(self.cfg.nodes) diff --git a/tests/command_line_test.py b/tests/command_line_test.py index 9ee97067..6dbd79f2 100644 --- a/tests/command_line_test.py +++ b/tests/command_line_test.py @@ -6,6 +6,7 @@ from .base_test_case import BaseTestCase from pyt.__main__ import parse_args + @contextmanager def capture_sys_output(): capture_out, capture_err = StringIO(), StringIO() @@ -16,6 +17,7 @@ def capture_sys_output(): finally: sys.stdout, sys.stderr = current_out, current_err + class CommandLineTest(BaseTestCase): def test_no_args(self): with self.assertRaises(SystemExit): @@ -23,12 +25,10 @@ def test_no_args(self): parse_args([]) EXPECTED = """usage: python -m pyt [-h] (-f FILEPATH | -gr GIT_REPOS) [-pr PROJECT_ROOT] - [-d] [-o OUTPUT_FILENAME] [-csv CSV_PATH] - [-t TRIGGER_WORD_FILE] [-m BLACKBOX_MAPPING_FILE] [-py2] - [-l LOG_LEVEL] [-a ADAPTOR] [-db] - [-dl DRAW_LATTICE [DRAW_LATTICE ...]] [-j] [-ppm] - [-b BASELINE] [-p | -vp | -trim | -i] [-li | -re | -rt] - {save,github_search} ...\n""" + \ + [-csv CSV_PATH] [-t TRIGGER_WORD_FILE] + [-m BLACKBOX_MAPPING_FILE] [-py2] [-l LOG_LEVEL] + [-a ADAPTOR] [-j] [-b BASELINE] [-trim | -i] + {github_search} ...\n""" + \ "python -m pyt: error: one of the arguments " + \ "-f/--filepath -gr/--git-repos is required\n" self.assertEqual(stderr.getvalue(), EXPECTED) diff --git a/tests/github_search_test.py b/tests/github_search_test.py index f5797f39..be1539b1 100644 --- a/tests/github_search_test.py +++ b/tests/github_search_test.py @@ -5,7 +5,17 @@ class GetDatesTest(unittest.TestCase): - def test_range_shorter_than_interval(self): - date_range = get_dates(date(2016,12,12), date(2016,12,13), 7) - - + def test_get_dates(self): + date_ranges = get_dates( + date(2018, 1, 1), + date(2018, 1, 31) + ) + EXPECTED_RANGE = ( + ('2018-01-01', '2018-01-08'), + ('2018-01-08', '2018-01-15'), + ('2018-01-15', '2018-01-22'), + ('2018-01-22', '2018-01-29') + ) + for date_range, expected_range in zip(date_ranges, EXPECTED_RANGE): + for date_, expected_date in zip(date_range, expected_range): + assert str(date_) == expected_date diff --git a/tests/lattice_test.py b/tests/lattice_test.py deleted file mode 100644 index 5164c8ac..00000000 --- a/tests/lattice_test.py +++ /dev/null @@ -1,143 +0,0 @@ -from .base_test_case import BaseTestCase -from pyt.constraint_table import constraint_table -from pyt.lattice import Lattice -from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis - - -class LatticeTest(BaseTestCase): - - class AnalysisType: - @staticmethod - def get_lattice_elements(cfg_nodes): - for node in cfg_nodes: - if node.lattice_element == True: - yield node - def equality(self, value): - return self.value == value - - class Node: - def __init__(self, value, lattice_element): - self.value = value - self.lattice_element = lattice_element - def __str__(self): - return str(self.value) - - def test_generate_integer_elements(self): - one = self.Node(1, True) - two = self.Node(2, True) - three = self.Node(3, True) - a = self.Node('a', False) - b = self.Node('b', False) - c = self.Node('c', False) - cfg_nodes = [one, two, three, a, b, c] - lattice = Lattice(cfg_nodes, self.AnalysisType) - - self.assertEqual(lattice.el2bv[one], 0b1) - self.assertEqual(lattice.el2bv[two], 0b10) - self.assertEqual(lattice.el2bv[three], 0b100) - - self.assertEqual(lattice.bv2el[0], three) - self.assertEqual(lattice.bv2el[1], two) - self.assertEqual(lattice.bv2el[2], one) - - def test_join(self): - # join not used at the moment - return - - a = self.Node('x = 1', True) - b = self.Node('print(x)', False) - c = self.Node('x = 3', True) - d = self.Node('y = x', True) - - lattice = Lattice([a, c, d], self.AnalysisType) - - # Constraint results after fixpoint: - lattice.table[a] = 0b0001 - lattice.table[b] = 0b0001 - lattice.table[c] = 0b0010 - lattice.table[d] = 0b1010 - - r = lattice.join([a,c], [c]) - self.assertEqual(r, 0b11) - r = lattice.join([a, c], [d, c]) - self.assertEqual(r, 0b1011) - r = lattice.join([a], [c]) - self.assertEqual(r, 0b11) - r = lattice.join([c], [d]) - self.assertEqual(r, 0b1010) - r = lattice.join([], [a]) - self.assertEqual(r, 0b1) - r = lattice.join([a,c,d], [a,c,d]) - self.assertEqual(r, 0b1011) - r = lattice.join([d,c], []) - self.assertEqual(r, 0b1010) - - def test_meet(self): - # meet not used on lattice atm - return - - a = self.Node('x = 1', True) - b = self.Node('print(x)', False) - c = self.Node('x = 3', True) - d = self.Node('y = x', True) - - lattice = Lattice([a, c, d], self.AnalysisType) - - # Constraint results after fixpoint: - lattice.table[a] = 0b0001 - lattice.table[b] = 0b0001 - lattice.table[c] = 0b0010 - lattice.table[d] = 0b1010 - - r = lattice.meet([a,c], [c,d]) - self.assertEqual(r, 0b10) - r = lattice.meet([a], [d]) - self.assertEqual(r, 0b0) - r = lattice.meet([a,c,d], [a,c]) - self.assertEqual(r, 0b011) - r = lattice.meet([c,d], [a,d]) - self.assertEqual(r, 0b1010) - r = lattice.meet([], []) - self.assertEqual(r, 0b0) - r = lattice.meet([a], []) - self.assertEqual(r, 0b0) - - def test_in_constraint(self): - a = self.Node('x = 1', True) - b = self.Node('print(x)', False) - c = self.Node('x = 3', True) - d = self.Node('y = x', True) - - lattice = Lattice([a, c, d], self.AnalysisType) - - constraint_table[a] = 0b001 - constraint_table[b] = 0b001 - constraint_table[c] = 0b010 - constraint_table[d] = 0b110 - - self.assertEqual(lattice.in_constraint(a, b), True) - self.assertEqual(lattice.in_constraint(a, a), True) - self.assertEqual(lattice.in_constraint(a, d), False) - self.assertEqual(lattice.in_constraint(a, c), False) - self.assertEqual(lattice.in_constraint(c, d), True) - self.assertEqual(lattice.in_constraint(d, d), True) - self.assertEqual(lattice.in_constraint(c, c), True) - self.assertEqual(lattice.in_constraint(c, a), False) - self.assertEqual(lattice.in_constraint(c, b), False) - - def test_get_elements(self): - a = self.Node('x = 1', True) - b = self.Node('print(x)', False) - c = self.Node('x = 3', True) - d = self.Node('y = x', True) - - lattice = Lattice([a, c, d], self.AnalysisType) - - self.assertEqual(set(lattice.get_elements(0b111)), {a,c,d}) - self.assertEqual(set(lattice.get_elements(0b0)), set()) - self.assertEqual(set(lattice.get_elements(0b1)), {a}) - self.assertEqual(set(lattice.get_elements(0b10)), {c}) - self.assertEqual(set(lattice.get_elements(0b100)), {d}) - self.assertEqual(set(lattice.get_elements(0b11)), {a,c}) - self.assertEqual(set(lattice.get_elements(0b101)), {a,d}) - self.assertEqual(set(lattice.get_elements(0b110)), {c,d}) diff --git a/tests/liveness_test.py b/tests/liveness_test.py index da6f08a0..e69de29b 100644 --- a/tests/liveness_test.py +++ b/tests/liveness_test.py @@ -1,33 +0,0 @@ -from .analysis_base_test_case import AnalysisBaseTestCase -from pyt.constraint_table import constraint_table -from pyt.liveness import LivenessAnalysis - - -class LivenessTest(AnalysisBaseTestCase): - def test_example(self): - lattice = self.run_analysis('examples/example_inputs/example.py', LivenessAnalysis) - - x = 0b1 # 1 - y = 0b10 # 2 - z = 0b100 # 4 - - lattice.el2bv['x'] = x - lattice.el2bv['y'] = y - lattice.el2bv['z'] = z - - self.assertEqual(lattice.get_elements(constraint_table[self.cfg.nodes[0]]), []) - self.assertEqual(lattice.get_elements(constraint_table[self.cfg.nodes[1]]), []) - self.assertEqual(lattice.get_elements(constraint_table[self.cfg.nodes[2]]), []) - self.assertEqual(lattice.get_elements(constraint_table[self.cfg.nodes[3]]), ['x']) - self.assertEqual(lattice.get_elements(constraint_table[self.cfg.nodes[4]]), ['x']) - self.assertEqual(lattice.get_elements(constraint_table[self.cfg.nodes[5]]), ['x']) - self.assertEqual(lattice.get_elements(constraint_table[self.cfg.nodes[6]]), ['x']) - self.assertEqual(set(lattice.get_elements(constraint_table[self.cfg.nodes[7]])), set(['x','y'])) - self.assertEqual(set(lattice.get_elements(constraint_table[self.cfg.nodes[8]])), set(['x','y'])) - self.assertEqual(lattice.get_elements(constraint_table[self.cfg.nodes[9]]), ['x']) - self.assertEqual(set(lattice.get_elements(constraint_table[self.cfg.nodes[10]])), set(['x','z'])) - self.assertEqual(set(lattice.get_elements(constraint_table[self.cfg.nodes[11]])), set(['x','z'])) - self.assertEqual(set(lattice.get_elements(constraint_table[self.cfg.nodes[12]])), set(['x','z'])) - self.assertEqual(lattice.get_elements(constraint_table[self.cfg.nodes[13]]), ['x']) - self.assertEqual(lattice.get_elements(constraint_table[self.cfg.nodes[14]]), []) - self.assertEqual(len(lattice.el2bv), 3) diff --git a/tests/vulnerabilities_across_files_test.py b/tests/vulnerabilities_across_files_test.py index 7492aee2..4bbcde74 100644 --- a/tests/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities_across_files_test.py @@ -35,7 +35,6 @@ def run_analysis(self, path): return find_vulnerabilities( cfg_list, - ReachingDefinitionsTaintAnalysis, UImode.NORMAL, VulnerabilityFiles( default_blackbox_mapping_file, @@ -151,9 +150,10 @@ def test_sink_with_result_of_blackbox_nested(self): ~call_3 = ret_subprocess.call(result, shell=True) This vulnerability is unknown due to: Label: ~call_1 = ret_scrypt.encrypt(~call_2) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) - or - self.string_compare_alpha(vulnerability_description, OTHER_EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertTrue( + self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) or + self.string_compare_alpha(vulnerability_description, OTHER_EXPECTED_VULNERABILITY_DESCRIPTION) + ) def test_sink_with_result_of_user_defined_nested(self): vulnerabilities = self.run_analysis('examples/nested_functions_code/sink_with_result_of_user_defined_nested.py') @@ -233,9 +233,10 @@ def test_sink_with_blackbox_inner(self): ~call_1 = ret_subprocess.call(~call_2, shell=True) This vulnerability is unknown due to: Label: ~call_3 = ret_scrypt.encypt(req_param) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) - or - self.string_compare_alpha(vulnerability_description, OTHER_EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertTrue( + self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) or + self.string_compare_alpha(vulnerability_description, OTHER_EXPECTED_VULNERABILITY_DESCRIPTION) + ) def test_sink_with_user_defined_inner(self): vulnerabilities = self.run_analysis('examples/nested_functions_code/sink_with_user_defined_inner.py') diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index f3d77279..d85acdad 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -129,11 +129,13 @@ def run_analysis(self, path): FrameworkAdaptor(cfg_list, [], [], is_flask_route_function) initialize_constraint_table(cfg_list) - analyse(cfg_list, analysis_type=ReachingDefinitionsTaintAnalysis) + analyse( + cfg_list, + analysis_type=ReachingDefinitionsTaintAnalysis + ) return vulnerabilities.find_vulnerabilities( cfg_list, - ReachingDefinitionsTaintAnalysis, UImode.NORMAL, VulnerabilityFiles( default_blackbox_mapping_file, @@ -518,7 +520,10 @@ def run_analysis(self, path): FrameworkAdaptor(cfg_list, [], [], is_django_view_function) initialize_constraint_table(cfg_list) - analyse(cfg_list, analysis_type=ReachingDefinitionsTaintAnalysis) + analyse( + cfg_list, + analysis_type=ReachingDefinitionsTaintAnalysis + ) trigger_word_file = os.path.join( 'pyt', @@ -528,7 +533,6 @@ def run_analysis(self, path): return vulnerabilities.find_vulnerabilities( cfg_list, - ReachingDefinitionsTaintAnalysis, UImode.NORMAL, VulnerabilityFiles( default_blackbox_mapping_file, @@ -566,7 +570,10 @@ def run_analysis(self, path): FrameworkAdaptor(cfg_list, [], [], is_function) initialize_constraint_table(cfg_list) - analyse(cfg_list, analysis_type=ReachingDefinitionsTaintAnalysis) + analyse( + cfg_list, + analysis_type=ReachingDefinitionsTaintAnalysis + ) trigger_word_file = os.path.join( 'pyt', @@ -576,7 +583,6 @@ def run_analysis(self, path): return vulnerabilities.find_vulnerabilities( cfg_list, - ReachingDefinitionsTaintAnalysis, UImode.NORMAL, VulnerabilityFiles( default_blackbox_mapping_file, diff --git a/tox.ini b/tox.ini index 70393146..933a1460 100644 --- a/tox.ini +++ b/tox.ini @@ -7,5 +7,5 @@ deps = -rrequirements-dev.txt commands = coverage erase coverage run tests - coverage report --show-missing --fail-under 89 + coverage report --show-missing --fail-under 88 pre-commit run From bbb6c2e1bc1ce7b211138edf434efc1fb0cc15db Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 20 Apr 2018 22:45:45 -0700 Subject: [PATCH 018/291] made github_search the only thing that imports repo_runner --- pyt/__main__.py | 31 ++++++--------------- pyt/github_search.py | 65 ++++++++++++++++++++++++++++++++------------ 2 files changed, 56 insertions(+), 40 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 0d23bebc..fac79881 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -29,7 +29,7 @@ is_function_without_leading_ ) from .github_search import ( - analyse_repo, + analyse_repos, scan_github, set_github_api_token ) @@ -38,7 +38,6 @@ get_modules ) from .reaching_definitions_taint import ReachingDefinitionsTaintAnalysis -from .repo_runner import get_repos from .vulnerabilities import find_vulnerabilities @@ -161,30 +160,16 @@ def main(command_line_args=sys.argv[1:]): cfg_list = list() if args.git_repos: - repos = get_repos(args.git_repos) - for repo in repos: - repo.clone() - vulnerabilities = analyse_repo( - args, - repo, - ui_mode - ) - if args.json: - json.report(vulnerabilities, sys.stdout) - else: - text.report(vulnerabilities, sys.stdout) - if not vulnerabilities: - repo.clean_up() + analyse_repos( + args, + ui_mode + ) exit() - - if args.which == 'search': + elif args.which == 'search': set_github_api_token() scan_github( - args.search_string, - args.start_date, - args.csv_path, - ui_mode, - args + args, + ui_mode ) exit() diff --git a/pyt/github_search.py b/pyt/github_search.py index 0dc1e953..e6945b28 100644 --- a/pyt/github_search.py +++ b/pyt/github_search.py @@ -1,23 +1,37 @@ import os import re import requests +import sys import time -from abc import ABCMeta, abstractmethod -from datetime import date, datetime, timedelta - +from abc import ( + ABCMeta, + abstractmethod +) +from datetime import ( + date, + datetime, + timedelta +) from . import repo_runner from .argument_helpers import VulnerabilityFiles from .ast_helper import generate_ast from .constraint_table import initialize_constraint_table from .expr_visitor import make_cfg from .fixed_point import analyse -from .formatters import json +from .formatters import ( + json, + text +) from .project_handler import ( get_directory_modules, get_modules ) from .reaching_definitions_taint import ReachingDefinitionsTaintAnalysis -from .repo_runner import add_repo_to_csv, NoEntryPathError +from .repo_runner import ( + add_repo_to_csv, + get_repos, + NoEntryPathError +) from .vulnerabilities import find_vulnerabilities @@ -220,7 +234,6 @@ def analyse_repo( github_repo, ui_mode ): - cfg_list = list() directory = os.path.dirname(github_repo.path) project_modules = get_modules(directory) local_modules = get_directory_modules(directory) @@ -231,7 +244,7 @@ def analyse_repo( local_modules, github_repo.path ) - cfg_list.append(cfg) + cfg_list = list(cfg) initialize_constraint_table(cfg_list) analyse( @@ -250,16 +263,13 @@ def analyse_repo( def scan_github( - search_string, - start_date, - csv_path, - ui_mode, - other_args + cmd_line_args, + ui_mode ): - for range_start, range_end in get_dates(start_date): + for range_start, range_end in get_dates(cmd_line_args.start_date): query = Query( SEARCH_REPO_URL, - search_string, + cmd_line_args.search_string, time_interval='{} .. {}'.format( range_start, range_end @@ -282,12 +292,33 @@ def scan_github( print('NoEntryPathError for {}'.format(repo.url)) continue vulnerabilities = analyse_repo( - other_args, + cmd_line_args, repo, ui_mode ) with open(repo.path + '.pyt', 'a') as fd: - json.report(vulnerabilities, fd) + if cmd_line_args.json: + json.report(vulnerabilities, fd) + else: + text.report(vulnerabilities, fd) + if vulnerabilities: - add_repo_to_csv(csv_path, repo) + add_repo_to_csv(cmd_line_args.csv_path, repo) repo.clean_up() + + +def analyse_repos(cmd_line_args, ui_mode): + repos = get_repos(cmd_line_args.git_repos) + for repo in repos: + repo.clone() + vulnerabilities = analyse_repo( + cmd_line_args, + repo, + ui_mode + ) + if cmd_line_args.json: + json.report(vulnerabilities, sys.stdout) + else: + text.report(vulnerabilities, sys.stdout) + if not vulnerabilities: + repo.clean_up() From cc3e10e12f2763b041d87792169c5d687458c34b Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 20 Apr 2018 23:04:33 -0700 Subject: [PATCH 019/291] Trim reaching definitions, and definition chains --- .coveragerc | 1 - pyt/definition_chains.py | 64 ++++++------------------------ pyt/reaching_definitions.py | 23 ----------- pyt/reaching_definitions_base.py | 29 -------------- pyt/reaching_definitions_taint.py | 31 +++++++++++++-- pyt/vulnerabilities.py | 5 ++- tests/reaching_definitions_test.py | 50 ----------------------- 7 files changed, 44 insertions(+), 159 deletions(-) delete mode 100644 pyt/reaching_definitions.py delete mode 100644 pyt/reaching_definitions_base.py delete mode 100644 tests/reaching_definitions_test.py diff --git a/.coveragerc b/.coveragerc index 37429140..48d1154f 100644 --- a/.coveragerc +++ b/.coveragerc @@ -12,7 +12,6 @@ exclude_lines = source = ./pyt omit = pyt/__main__.py - pyt/definition_chains.py pyt/formatters/json.py pyt/formatters/text.py pyt/github_search.py diff --git a/pyt/definition_chains.py b/pyt/definition_chains.py index ad63871b..b69da7c3 100644 --- a/pyt/definition_chains.py +++ b/pyt/definition_chains.py @@ -1,73 +1,33 @@ -import ast +from collections import defaultdict from .constraint_table import constraint_table -from .lattice import Lattice from .node_types import AssignmentNode -from .vars_visitor import VarsVisitor -def get_vars(node): - vv = VarsVisitor() - if isinstance(node.ast_node, (ast.If, ast.While)): - vv.visit(node.ast_node.test) - elif isinstance(node.ast_node, (ast.ClassDef, ast.FunctionDef)): - return set() - else: - try: - vv.visit(node.ast_node) - except AttributeError: # If no ast_node - vv.result = list() - - vv.result = set(vv.result) - - # Filter out lvars: - for var in vv.result: - try: - if var in node.right_hand_side_variables: - yield var - except AttributeError: - yield var - - -def get_constraint_nodes(node, lattice): +def get_constraint_nodes( + node, + lattice +): for n in lattice.get_elements(constraint_table[node]): if n is not node: yield n -def build_use_def_chain(cfg_nodes): - use_def = dict() - lattice = Lattice(cfg_nodes) - - for node in cfg_nodes: - definitions = list() - for constraint_node in get_constraint_nodes(node, lattice): - for var in get_vars(node): - if var in constraint_node.left_hand_side: - definitions.append((var, constraint_node)) - use_def[node] = definitions - - return use_def - - -def build_def_use_chain(cfg_nodes): - def_use = dict() - lattice = Lattice(cfg_nodes) - +def build_def_use_chain( + cfg_nodes, + lattice +): + def_use = defaultdict(list) # For every node for node in cfg_nodes: # That's a definition if isinstance(node, AssignmentNode): - # Make an empty list for it in def_use dict - def_use[node] = list() - - # Get its uses + # Get the uses for variable in node.right_hand_side_variables: # Loop through most of the nodes before it for earlier_node in get_constraint_nodes(node, lattice): - # and add to the 'uses list' of each earlier node, when applicable + # and add them to the 'uses list' of each earlier node, when applicable # 'earlier node' here being a simplification if variable in earlier_node.left_hand_side: def_use[earlier_node].append(node) - return def_use diff --git a/pyt/reaching_definitions.py b/pyt/reaching_definitions.py deleted file mode 100644 index 299339d2..00000000 --- a/pyt/reaching_definitions.py +++ /dev/null @@ -1,23 +0,0 @@ -from .constraint_table import constraint_table -from .node_types import AssignmentNode -from .reaching_definitions_base import ReachingDefinitionsAnalysisBase - - -class ReachingDefinitionsAnalysis(ReachingDefinitionsAnalysisBase): - """Reaching definitions analysis rules implemented.""" - - def fixpointmethod(self, cfg_node): - """Regular reaching definitions. - Used in definition_chains.py - """ - JOIN = self.join(cfg_node) - # Assignment check - if isinstance(cfg_node, AssignmentNode): - arrow_result = JOIN - # Get previous assignments of cfg_node.left_hand_side and remove them from JOIN - arrow_result = self.arrow(JOIN, cfg_node.left_hand_side) - arrow_result = arrow_result | self.lattice.el2bv[cfg_node] - constraint_table[cfg_node] = arrow_result - # Default case - else: - constraint_table[cfg_node] = JOIN diff --git a/pyt/reaching_definitions_base.py b/pyt/reaching_definitions_base.py deleted file mode 100644 index f2dec1dd..00000000 --- a/pyt/reaching_definitions_base.py +++ /dev/null @@ -1,29 +0,0 @@ -from .constraint_table import constraint_join -from .lattice import Lattice - - -class ReachingDefinitionsAnalysisBase(): - """Reaching definitions analysis rules implemented.""" - - def __init__(self, cfg): - self.cfg = cfg - self.lattice = Lattice(cfg.nodes) - - def join(self, cfg_node): - """Joins all constraints of the ingoing nodes and returns them. - This represents the JOIN auxiliary definition from Schwartzbach.""" - return constraint_join(cfg_node.ingoing) - - def arrow(self, JOIN, _id): - """Removes all previous assignments from JOIN that have the same left hand side. - This represents the arrow id definition from Schwartzbach.""" - r = JOIN - for node in self.lattice.get_elements(JOIN): - if node.left_hand_side == _id: - r = r ^ self.lattice.el2bv[node] - return r - - def dep(self, q_1): - """Represents the dep mapping from Schwartzbach.""" - for node in q_1.outgoing: - yield node diff --git a/pyt/reaching_definitions_taint.py b/pyt/reaching_definitions_taint.py index b2c2e1d7..c334d187 100644 --- a/pyt/reaching_definitions_taint.py +++ b/pyt/reaching_definitions_taint.py @@ -1,9 +1,15 @@ -from .constraint_table import constraint_table +from .constraint_table import ( + constraint_join, + constraint_table +) +from .lattice import Lattice from .node_types import AssignmentNode -from .reaching_definitions_base import ReachingDefinitionsAnalysisBase -class ReachingDefinitionsTaintAnalysis(ReachingDefinitionsAnalysisBase): +class ReachingDefinitionsTaintAnalysis(): + def __init__(self, cfg): + self.cfg = cfg + self.lattice = Lattice(cfg.nodes) def fixpointmethod(self, cfg_node): """The most important part of PyT, where we perform @@ -24,3 +30,22 @@ def fixpointmethod(self, cfg_node): # Default case else: constraint_table[cfg_node] = JOIN + + def join(self, cfg_node): + """Joins all constraints of the ingoing nodes and returns them. + This represents the JOIN auxiliary definition from Schwartzbach.""" + return constraint_join(cfg_node.ingoing) + + def arrow(self, JOIN, _id): + """Removes all previous assignments from JOIN that have the same left hand side. + This represents the arrow id definition from Schwartzbach.""" + r = JOIN + for node in self.lattice.get_elements(JOIN): + if node.left_hand_side == _id: + r = r ^ self.lattice.el2bv[node] + return r + + def dep(self, q_1): + """Represents the dep mapping from Schwartzbach.""" + for node in q_1.outgoing: + yield node diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index ca4c1ca6..d3f7e629 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -434,7 +434,10 @@ def get_vulnerability( elif isinstance(cfg_node, IfNode): potential_sanitiser = cfg_node - def_use = build_def_use_chain(cfg.nodes) + def_use = build_def_use_chain( + cfg.nodes, + lattice + ) for chain in get_vulnerability_chains( source.cfg_node, sink.cfg_node, diff --git a/tests/reaching_definitions_test.py b/tests/reaching_definitions_test.py deleted file mode 100644 index 75524893..00000000 --- a/tests/reaching_definitions_test.py +++ /dev/null @@ -1,50 +0,0 @@ -from .analysis_base_test_case import AnalysisBaseTestCase -from pyt.constraint_table import constraint_table -from pyt.reaching_definitions import ReachingDefinitionsAnalysis - - -class ReachingDefinitionsTest(AnalysisBaseTestCase): - def test_linear_program(self): - constraint_table = {} - lattice = self.run_analysis('examples/example_inputs/linear.py', ReachingDefinitionsAnalysis) - - EXPECTED = [ - "Label: Entry module: ", - "Label: ~call_1 = ret_input(): Label: ~call_1 = ret_input()", - "Label: x = ~call_1: Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: y = x - 1: Label: y = x - 1, Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: ~call_2 = ret_print(x): Label: ~call_2 = ret_print(x), Label: y = x - 1, Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: Exit module: Label: ~call_2 = ret_print(x), Label: y = x - 1, Label: x = ~call_1, Label: ~call_1 = ret_input()", - ] - i = 0 - for k, v in constraint_table.items(): - row = str(k) + ': ' + ','.join([str(n) for n in lattice.get_elements(v)]) - self.assertTrue(self.string_compare_alnum(row, EXPECTED[i])) - i = i + 1 - - def test_example(self): - constraint_table = {} - lattice = self.run_analysis('examples/example_inputs/example.py', ReachingDefinitionsAnalysis) - - EXPECTED = [ - "Label: Entry module: ", - "Label: ~call_1 = ret_input(): Label: ~call_1 = ret_input()", - "Label: x = ~call_1: Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: ~call_2 = ret_int(x): Label: ~call_2 = ret_int(x), Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: x = ~call_2: Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: while x > 1:: Label: z = z - 1, Label: x = x / 2, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: y = x / 2: Label: z = z - 1, Label: x = x / 2, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: if y > 3:: Label: z = z - 1, Label: x = x / 2, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: x = x - y: Label: z = z - 1, Label: x = x - y, Label: y = x / 2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: z = x - 4: Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: if z > 0:: Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: x = x / 2: Label: x = x / 2, Label: z = x - 4, Label: y = x / 2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: z = z - 1: Label: z = z - 1, Label: x = x / 2, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: ~call_3 = ret_print(x): Label: ~call_3 = ret_print(x), Label: z = z - 1, Label: x = x / 2, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: Exit module: Label: ~call_3 = ret_print(x), Label: z = z - 1, Label: x = x / 2, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - ] - i = 0 - for k, v in constraint_table.items(): - row = str(k) + ': ' + ','.join([str(n) for n in lattice.get_elements(v)]) - self.assertTrue(self.string_compare_alnum(row, EXPECTED[i])) - i = i + 1 From eebff478618896c0d192e1908e58024bf8023a51 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 20 Apr 2018 23:21:18 -0700 Subject: [PATCH 020/291] Made fixed point only accept reaching_definitions_taint_test --- pyt/fixed_point.py | 9 +- tests/analysis_base_test_case.py | 4 +- tests/reaching_definitions_taint_test.py | 146 ++++++++++----------- tests/vulnerabilities_across_files_test.py | 3 +- tests/vulnerabilities_test.py | 16 +-- 5 files changed, 81 insertions(+), 97 deletions(-) diff --git a/pyt/fixed_point.py b/pyt/fixed_point.py index de148518..e77086ce 100644 --- a/pyt/fixed_point.py +++ b/pyt/fixed_point.py @@ -1,16 +1,17 @@ """This module implements the fixed point algorithm.""" from .constraint_table import constraint_table +from .reaching_definitions_taint import ReachingDefinitionsTaintAnalysis class FixedPointAnalysis(): """Run the fix point analysis.""" - def __init__(self, cfg, analysis): + def __init__(self, cfg): """Fixed point analysis. Analysis must be a dataflow analysis containing a 'fixpointmethod' method that analyses one CFG.""" - self.analysis = analysis(cfg) + self.analysis = ReachingDefinitionsTaintAnalysis(cfg) self.cfg = cfg def fixpoint_runner(self): @@ -29,8 +30,8 @@ def fixpoint_runner(self): q = q[1:] # q = q.tail() # The list minus the head -def analyse(cfg_list, *, analysis_type): +def analyse(cfg_list): """Analyse a list of control flow graphs with a given analysis type.""" for cfg in cfg_list: - analysis = FixedPointAnalysis(cfg, analysis_type) + analysis = FixedPointAnalysis(cfg) analysis.fixpoint_runner() diff --git a/tests/analysis_base_test_case.py b/tests/analysis_base_test_case.py index 8e8bb37d..9bf5874b 100644 --- a/tests/analysis_base_test_case.py +++ b/tests/analysis_base_test_case.py @@ -52,9 +52,9 @@ def constraints(self, list_of_constraints, node_number): for c in list_of_constraints: yield (c, node_number) - def run_analysis(self, path, analysis_type): + def run_analysis(self, path): self.cfg_create_from_file(path) initialize_constraint_table([self.cfg]) - self.analysis = FixedPointAnalysis(self.cfg, analysis_type) + self.analysis = FixedPointAnalysis(self.cfg) self.analysis.fixpoint_runner() return Lattice(self.cfg.nodes) diff --git a/tests/reaching_definitions_taint_test.py b/tests/reaching_definitions_taint_test.py index dc18dcd5..e2d437a8 100644 --- a/tests/reaching_definitions_taint_test.py +++ b/tests/reaching_definitions_taint_test.py @@ -1,111 +1,105 @@ from .analysis_base_test_case import AnalysisBaseTestCase -from pyt.constraint_table import constraint_table -from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis class ReachingDefinitionsTaintTest(AnalysisBaseTestCase): # Note: the numbers in the test represent the line numbers of the assignments in the program. def test_linear_program(self): constraint_table = {} - lattice = self.run_analysis('examples/example_inputs/linear.py', ReachingDefinitionsTaintAnalysis) + lattice = self.run_analysis('examples/example_inputs/linear.py') EXPECTED = [ - "Label: Entry module:", - "Label: ~call_1 = ret_input(): Label: ~call_1 = ret_input()", - "Label: x = ~call_1: Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: y = x - 1: Label: y = x - 1, Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: ~call_2 = ret_print(x): Label: ~call_2 = ret_print(x), Label: y = x - 1, Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: Exit module: Label: ~call_2 = ret_print(x), Label: y = x - 1, Label: x = ~call_1, Label: ~call_1 = ret_input()" - ] + "Label: Entry module:", + "Label: ~call_1 = ret_input(): Label: ~call_1 = ret_input()", + "Label: x = ~call_1: Label: x = ~call_1, Label: ~call_1 = ret_input()", + "Label: y = x - 1: Label: y = x - 1, Label: x = ~call_1, Label: ~call_1 = ret_input()", + "Label: ~call_2 = ret_print(x): Label: ~call_2 = ret_print(x), Label: y = x - 1, Label: x = ~call_1, Label: ~call_1 = ret_input()", + "Label: Exit module: Label: ~call_2 = ret_print(x), Label: y = x - 1, Label: x = ~call_1, Label: ~call_1 = ret_input()" + ] i = 0 for k, v in constraint_table.items(): - row = str(k) + ': ' + ','.join([str(n) for n in lattice.get_elements(v)]) - self.assertTrue(self.string_compare_alnum(row, EXPECTED[i])) - i = i + 1 - + row = str(k) + ': ' + ','.join([str(n) for n in lattice.get_elements(v)]) + self.assertTrue(self.string_compare_alnum(row, EXPECTED[i])) + i = i + 1 def test_if_program(self): constraint_table = {} - lattice = self.run_analysis('examples/example_inputs/if_program.py', ReachingDefinitionsTaintAnalysis) + lattice = self.run_analysis('examples/example_inputs/if_program.py') EXPECTED = [ - "Label: Entry module:", - "Label: ~call_1 = ret_input(): Label: ~call_1 = ret_input()", - "Label: x = ~call_1: Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: if x > 0:: Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: y = x + 1: Label: y = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: ~call_2 = ret_print(x): Label: ~call_2 = ret_print(x), Label: y = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: Exit module: Label: ~call_2 = ret_print(x), Label: y = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_input()" - ] + "Label: Entry module:", + "Label: ~call_1 = ret_input(): Label: ~call_1 = ret_input()", + "Label: x = ~call_1: Label: x = ~call_1, Label: ~call_1 = ret_input()", + "Label: if x > 0:: Label: x = ~call_1, Label: ~call_1 = ret_input()", + "Label: y = x + 1: Label: y = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_input()", + "Label: ~call_2 = ret_print(x): Label: ~call_2 = ret_print(x), Label: y = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_input()", + "Label: Exit module: Label: ~call_2 = ret_print(x), Label: y = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_input()" + ] i = 0 for k, v in constraint_table.items(): - row = str(k) + ': ' + ','.join([str(n) for n in lattice.get_elements(v)]) - self.assertTrue(self.string_compare_alnum(row, EXPECTED[i])) - i = i + 1 + row = str(k) + ': ' + ','.join([str(n) for n in lattice.get_elements(v)]) + self.assertTrue(self.string_compare_alnum(row, EXPECTED[i])) + i = i + 1 def test_example(self): constraint_table = {} - lattice = self.run_analysis('examples/example_inputs/example.py', ReachingDefinitionsTaintAnalysis) + lattice = self.run_analysis('examples/example_inputs/example.py') EXPECTED = [ - "Label: Entry module:", - "Label: ~call_1 = ret_input(): Label: ~call_1 = ret_input()", - "Label: x = ~call_1: Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: ~call_2 = ret_int(x): Label: ~call_2 = ret_int(x), Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: x = ~call_2: Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: while x > 1:: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: y = x / 2: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: if y > 3:: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: x = x - y: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: z = x - 4: Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: if z > 0:: Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: x = x / 2: Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: z = z - 1: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: ~call_3 = ret_print(x): Label: ~call_3 = ret_print(x), Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: Exit module: Label: ~call_3 = ret_print(x), Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()" - ] + "Label: Entry module:", + "Label: ~call_1 = ret_input(): Label: ~call_1 = ret_input()", + "Label: x = ~call_1: Label: x = ~call_1, Label: ~call_1 = ret_input()", + "Label: ~call_2 = ret_int(x): Label: ~call_2 = ret_int(x), Label: x = ~call_1, Label: ~call_1 = ret_input()", + "Label: x = ~call_2: Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", + "Label: while x > 1:: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", + "Label: y = x / 2: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", + "Label: if y > 3:: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", + "Label: x = x - y: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", + "Label: z = x - 4: Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", + "Label: if z > 0:: Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", + "Label: x = x / 2: Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", + "Label: z = z - 1: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", + "Label: ~call_3 = ret_print(x): Label: ~call_3 = ret_print(x), Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", + "Label: Exit module: Label: ~call_3 = ret_print(x), Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()" + ] i = 0 for k, v in constraint_table.items(): - row = str(k) + ': ' + ','.join([str(n) for n in lattice.get_elements(v)]) - self.assertTrue(self.string_compare_alnum(row, EXPECTED[i])) - i = i + 1 + row = str(k) + ': ' + ','.join([str(n) for n in lattice.get_elements(v)]) + self.assertTrue(self.string_compare_alnum(row, EXPECTED[i])) + i = i + 1 def test_func_with_params(self): - lattice = self.run_analysis('examples/example_inputs/function_with_params.py', ReachingDefinitionsTaintAnalysis) + lattice = self.run_analysis('examples/example_inputs/function_with_params.py') - self.assertInCfg([(1,1), - (1,2), (2,2), - (1,3), (2,3), (3,3), - (1,4), (2,4), (3,4), (4,4), - (1,5), (2,5), (3,5), (4,5), - *self.constraints([1,2,3,4,6], 6), - *self.constraints([1,2,3,4,6,7], 7), - *self.constraints([1,2,3,4,6,7], 8), - *self.constraints([2,3,4,6,7,9], 9), - *self.constraints([2,3,4,6,7,9], 10)], lattice) + self.assertInCfg([(1, 1), + (1, 2), (2, 2), + (1, 3), (2, 3), (3, 3), + (1, 4), (2, 4), (3, 4), (4, 4), + (1, 5), (2, 5), (3, 5), (4, 5), + *self.constraints([1, 2, 3, 4, 6], 6), + *self.constraints([1, 2, 3, 4, 6, 7], 7), + *self.constraints([1, 2, 3, 4, 6, 7], 8), + *self.constraints([2, 3, 4, 6, 7, 9], 9), + *self.constraints([2, 3, 4, 6, 7, 9], 10)], lattice) def test_while(self): constraint_table = {} - lattice = self.run_analysis('examples/example_inputs/while.py', ReachingDefinitionsTaintAnalysis) + lattice = self.run_analysis('examples/example_inputs/while.py') EXPECTED = [ - "Label: Entry module: ", - "Label: ~call_2 = ret_input(): Label: ~call_2 = ret_input()", - "Label: ~call_1 = ret_int(~call_2): Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", - "Label: x = ~call_1: Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", - "Label: while x < 10:: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input(", - "Label: x = x + 1: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", - "Label: if x == 5:: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", - "Label: BreakNode: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", - "Label: x = 6: Label: x = 6, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", - "Label: ~call_3 = ret_print(x): Label: ~call_3 = ret_print(x), Label: x = 6, Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", - "Label: Exit module: Label: ~call_3 = ret_print(x), Label: x = 6, Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()" - ] + "Label: Entry module: ", + "Label: ~call_2 = ret_input(): Label: ~call_2 = ret_input()", + "Label: ~call_1 = ret_int(~call_2): Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", + "Label: x = ~call_1: Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", + "Label: while x < 10:: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input(", + "Label: x = x + 1: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", + "Label: if x == 5:: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", + "Label: BreakNode: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", + "Label: x = 6: Label: x = 6, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", + "Label: ~call_3 = ret_print(x): Label: ~call_3 = ret_print(x), Label: x = 6, Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", + "Label: Exit module: Label: ~call_3 = ret_print(x), Label: x = 6, Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()" + ] i = 0 for k, v in constraint_table.items(): - row = str(k) + ': ' + ','.join([str(n) for n in lattice.get_elements(v)]) - self.assertTrue(self.string_compare_alnum(row, EXPECTED[i])) - i = i + 1 - - def test_join(self): - pass + row = str(k) + ': ' + ','.join([str(n) for n in lattice.get_elements(v)]) + self.assertTrue(self.string_compare_alnum(row, EXPECTED[i])) + i = i + 1 diff --git a/tests/vulnerabilities_across_files_test.py b/tests/vulnerabilities_across_files_test.py index 4bbcde74..a9e1b02c 100644 --- a/tests/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities_across_files_test.py @@ -12,7 +12,6 @@ from pyt.framework_adaptor import FrameworkAdaptor from pyt.framework_helper import is_flask_route_function from pyt.project_handler import get_directory_modules, get_modules -from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis from pyt.vulnerabilities import find_vulnerabilities @@ -31,7 +30,7 @@ def run_analysis(self, path): initialize_constraint_table(cfg_list) - analyse(cfg_list, analysis_type=ReachingDefinitionsTaintAnalysis) + analyse(cfg_list) return find_vulnerabilities( cfg_list, diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index d85acdad..50470a14 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -21,7 +21,6 @@ is_function ) from pyt.node_types import Node -from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis class EngineTest(BaseTestCase): @@ -129,10 +128,7 @@ def run_analysis(self, path): FrameworkAdaptor(cfg_list, [], [], is_flask_route_function) initialize_constraint_table(cfg_list) - analyse( - cfg_list, - analysis_type=ReachingDefinitionsTaintAnalysis - ) + analyse(cfg_list) return vulnerabilities.find_vulnerabilities( cfg_list, @@ -520,10 +516,7 @@ def run_analysis(self, path): FrameworkAdaptor(cfg_list, [], [], is_django_view_function) initialize_constraint_table(cfg_list) - analyse( - cfg_list, - analysis_type=ReachingDefinitionsTaintAnalysis - ) + analyse(cfg_list) trigger_word_file = os.path.join( 'pyt', @@ -570,10 +563,7 @@ def run_analysis(self, path): FrameworkAdaptor(cfg_list, [], [], is_function) initialize_constraint_table(cfg_list) - analyse( - cfg_list, - analysis_type=ReachingDefinitionsTaintAnalysis - ) + analyse(cfg_list) trigger_word_file = os.path.join( 'pyt', From 54d53c503d1db7bb1d90577943dd14401f9f4e5b Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 20 Apr 2018 23:35:04 -0700 Subject: [PATCH 021/291] Updated more analyse call-sites --- pyt/__main__.py | 6 +----- pyt/github_search.py | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index fac79881..d3556d92 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -37,7 +37,6 @@ get_directory_modules, get_modules ) -from .reaching_definitions_taint import ReachingDefinitionsTaintAnalysis from .vulnerabilities import find_vulnerabilities @@ -208,10 +207,7 @@ def main(command_line_args=sys.argv[1:]): ) initialize_constraint_table(cfg_list) - analyse( - cfg_list, - ReachingDefinitionsTaintAnalysis - ) + analyse(cfg_list) vulnerabilities = find_vulnerabilities( cfg_list, ui_mode, diff --git a/pyt/github_search.py b/pyt/github_search.py index e6945b28..2fa1b2f5 100644 --- a/pyt/github_search.py +++ b/pyt/github_search.py @@ -26,7 +26,6 @@ get_directory_modules, get_modules ) -from .reaching_definitions_taint import ReachingDefinitionsTaintAnalysis from .repo_runner import ( add_repo_to_csv, get_repos, @@ -247,10 +246,7 @@ def analyse_repo( cfg_list = list(cfg) initialize_constraint_table(cfg_list) - analyse( - cfg_list, - ReachingDefinitionsTaintAnalysis - ) + analyse(cfg_list) vulnerabilities = find_vulnerabilities( cfg_list, ui_mode, From 53e8ae0e1aca9eb006ef699ea47d862039a01c81 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 20 Apr 2018 23:59:03 -0700 Subject: [PATCH 022/291] Move analysis modules and tests into their own folders --- pyt/__main__.py | 4 ++-- pyt/{ => analysis}/constraint_table.py | 0 pyt/{ => analysis}/definition_chains.py | 2 +- pyt/{ => analysis}/fixed_point.py | 0 pyt/{ => analysis}/lattice.py | 2 +- .../reaching_definitions_taint.py | 2 +- pyt/github_search.py | 4 ++-- pyt/utils/log.py | 23 ------------------- pyt/vulnerabilities.py | 4 ++-- .../__init__.py} | 0 .../{ => analysis}/analysis_base_test_case.py | 8 +++---- .../reaching_definitions_taint_test.py | 0 tests/vulnerabilities_across_files_test.py | 4 ++-- tests/vulnerabilities_test.py | 4 ++-- 14 files changed, 17 insertions(+), 40 deletions(-) rename pyt/{ => analysis}/constraint_table.py (100%) rename pyt/{ => analysis}/definition_chains.py (96%) rename pyt/{ => analysis}/fixed_point.py (100%) rename pyt/{ => analysis}/lattice.py (97%) rename pyt/{ => analysis}/reaching_definitions_taint.py (97%) delete mode 100644 pyt/utils/log.py rename tests/{liveness_test.py => analysis/__init__.py} (100%) rename tests/{ => analysis}/analysis_base_test_case.py (90%) rename tests/{ => analysis}/reaching_definitions_taint_test.py (100%) diff --git a/pyt/__main__.py b/pyt/__main__.py index d3556d92..1378b9ea 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -5,6 +5,8 @@ import sys from datetime import date +from .analysis.constraint_table import initialize_constraint_table +from .analysis.fixed_point import analyse from .argument_helpers import ( default_blackbox_mapping_file, default_trigger_word_file, @@ -14,9 +16,7 @@ ) from .ast_helper import generate_ast from .baseline import get_vulnerabilities_not_in_baseline -from .constraint_table import initialize_constraint_table from .expr_visitor import make_cfg -from .fixed_point import analyse from .formatters import ( json, text diff --git a/pyt/constraint_table.py b/pyt/analysis/constraint_table.py similarity index 100% rename from pyt/constraint_table.py rename to pyt/analysis/constraint_table.py diff --git a/pyt/definition_chains.py b/pyt/analysis/definition_chains.py similarity index 96% rename from pyt/definition_chains.py rename to pyt/analysis/definition_chains.py index b69da7c3..e4c79c83 100644 --- a/pyt/definition_chains.py +++ b/pyt/analysis/definition_chains.py @@ -1,7 +1,7 @@ from collections import defaultdict from .constraint_table import constraint_table -from .node_types import AssignmentNode +from ..node_types import AssignmentNode def get_constraint_nodes( diff --git a/pyt/fixed_point.py b/pyt/analysis/fixed_point.py similarity index 100% rename from pyt/fixed_point.py rename to pyt/analysis/fixed_point.py diff --git a/pyt/lattice.py b/pyt/analysis/lattice.py similarity index 97% rename from pyt/lattice.py rename to pyt/analysis/lattice.py index f4dd531f..148237c2 100644 --- a/pyt/lattice.py +++ b/pyt/analysis/lattice.py @@ -1,5 +1,5 @@ from .constraint_table import constraint_table -from .node_types import AssignmentNode +from ..node_types import AssignmentNode def get_lattice_elements(cfg_nodes): diff --git a/pyt/reaching_definitions_taint.py b/pyt/analysis/reaching_definitions_taint.py similarity index 97% rename from pyt/reaching_definitions_taint.py rename to pyt/analysis/reaching_definitions_taint.py index c334d187..6ec26e01 100644 --- a/pyt/reaching_definitions_taint.py +++ b/pyt/analysis/reaching_definitions_taint.py @@ -3,7 +3,7 @@ constraint_table ) from .lattice import Lattice -from .node_types import AssignmentNode +from ..node_types import AssignmentNode class ReachingDefinitionsTaintAnalysis(): diff --git a/pyt/github_search.py b/pyt/github_search.py index 2fa1b2f5..9e11f219 100644 --- a/pyt/github_search.py +++ b/pyt/github_search.py @@ -13,11 +13,11 @@ timedelta ) from . import repo_runner +from .analysis.constraint_table import initialize_constraint_table +from .analysis.fixed_point import analyse from .argument_helpers import VulnerabilityFiles from .ast_helper import generate_ast -from .constraint_table import initialize_constraint_table from .expr_visitor import make_cfg -from .fixed_point import analyse from .formatters import ( json, text diff --git a/pyt/utils/log.py b/pyt/utils/log.py deleted file mode 100644 index 74a49f56..00000000 --- a/pyt/utils/log.py +++ /dev/null @@ -1,23 +0,0 @@ -import logging - - -LOGGING_FMT = '%(levelname)3s] %(filename)s::%(funcName)s(%(lineno)d) - %(message)s' - - -def remove_other_handlers(to_keep=None): - for hdl in logger.handlers: - if hdl != to_keep: - logger.removeHandler(hdl) - - -def enable_logger(to_file=None): - logger.setLevel(logging.DEBUG) - ch = logging.StreamHandler() if not to_file else logging.FileHandler(to_file, mode='w') - ch.setLevel(logging.DEBUG) - fmt = logging.Formatter(LOGGING_FMT) - ch.setFormatter(fmt) - logger.addHandler(ch) - remove_other_handlers(ch) - -logger = logging.getLogger('pyt') -remove_other_handlers() diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index d3f7e629..58be0591 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -4,9 +4,9 @@ import json from collections import namedtuple +from .analysis.definition_chains import build_def_use_chain +from .analysis.lattice import Lattice from .argument_helpers import UImode -from .definition_chains import build_def_use_chain -from .lattice import Lattice from .node_types import ( AssignmentNode, BBorBInode, diff --git a/tests/liveness_test.py b/tests/analysis/__init__.py similarity index 100% rename from tests/liveness_test.py rename to tests/analysis/__init__.py diff --git a/tests/analysis_base_test_case.py b/tests/analysis/analysis_base_test_case.py similarity index 90% rename from tests/analysis_base_test_case.py rename to tests/analysis/analysis_base_test_case.py index 9bf5874b..37d8e6f6 100644 --- a/tests/analysis_base_test_case.py +++ b/tests/analysis/analysis_base_test_case.py @@ -1,9 +1,9 @@ from collections import namedtuple -from .base_test_case import BaseTestCase -from pyt.constraint_table import initialize_constraint_table -from pyt.fixed_point import FixedPointAnalysis -from pyt.lattice import Lattice +from ..base_test_case import BaseTestCase +from pyt.analysis.constraint_table import initialize_constraint_table +from pyt.analysis.fixed_point import FixedPointAnalysis +from pyt.analysis.lattice import Lattice class AnalysisBaseTestCase(BaseTestCase): diff --git a/tests/reaching_definitions_taint_test.py b/tests/analysis/reaching_definitions_taint_test.py similarity index 100% rename from tests/reaching_definitions_taint_test.py rename to tests/analysis/reaching_definitions_taint_test.py diff --git a/tests/vulnerabilities_across_files_test.py b/tests/vulnerabilities_across_files_test.py index a9e1b02c..30316c72 100644 --- a/tests/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities_across_files_test.py @@ -7,8 +7,8 @@ UImode, VulnerabilityFiles ) -from pyt.constraint_table import initialize_constraint_table -from pyt.fixed_point import analyse +from pyt.analysis.constraint_table import initialize_constraint_table +from pyt.analysis.fixed_point import analyse from pyt.framework_adaptor import FrameworkAdaptor from pyt.framework_helper import is_flask_route_function from pyt.project_handler import get_directory_modules, get_modules diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index 50470a14..d578866d 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -6,14 +6,14 @@ trigger_definitions_parser, vulnerabilities ) +from pyt.analysis.constraint_table import initialize_constraint_table +from pyt.analysis.fixed_point import analyse from pyt.argument_helpers import ( default_blackbox_mapping_file, default_trigger_word_file, UImode, VulnerabilityFiles ) -from pyt.constraint_table import initialize_constraint_table -from pyt.fixed_point import analyse from pyt.framework_adaptor import FrameworkAdaptor from pyt.framework_helper import ( is_django_view_function, From 078d171b41438341106d3a7990aa738d4868f932 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 21 Apr 2018 13:32:56 -0700 Subject: [PATCH 023/291] Moved cfg modules into their own submodule --- pyt/__main__.py | 2 +- pyt/cfg/__init__.py | 38 +++++++++++++++++++ pyt/{ => cfg}/expr_visitor.py | 32 +++------------- pyt/{ => cfg}/expr_visitor_helper.py | 20 +--------- pyt/{ => cfg}/stmt_visitor.py | 16 ++++---- pyt/{ => cfg}/stmt_visitor_helper.py | 2 +- pyt/framework_adaptor.py | 2 +- pyt/github_search.py | 2 +- tests/base_test_case.py | 57 +++++++++++++++++++--------- 9 files changed, 96 insertions(+), 75 deletions(-) create mode 100644 pyt/cfg/__init__.py rename pyt/{ => cfg}/expr_visitor.py (97%) rename pyt/{ => cfg}/expr_visitor_helper.py (56%) rename pyt/{ => cfg}/stmt_visitor.py (99%) rename pyt/{ => cfg}/stmt_visitor_helper.py (99%) diff --git a/pyt/__main__.py b/pyt/__main__.py index 1378b9ea..523809aa 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -16,7 +16,7 @@ ) from .ast_helper import generate_ast from .baseline import get_vulnerabilities_not_in_baseline -from .expr_visitor import make_cfg +from .cfg import make_cfg from .formatters import ( json, text diff --git a/pyt/cfg/__init__.py b/pyt/cfg/__init__.py new file mode 100644 index 00000000..710c5f66 --- /dev/null +++ b/pyt/cfg/__init__.py @@ -0,0 +1,38 @@ +from .expr_visitor import ExprVisitor + + +class CFG(): + def __init__(self, nodes, blackbox_assignments): + self.nodes = nodes + self.blackbox_assignments = blackbox_assignments + + def __repr__(self): + output = '' + for x, n in enumerate(self.nodes): + output = ''.join((output, 'Node: ' + str(x) + ' ' + repr(n), '\n\n')) + return output + + def __str__(self): + output = '' + for x, n in enumerate(self.nodes): + output = ''.join((output, 'Node: ' + str(x) + ' ' + str(n), '\n\n')) + return output + + +def make_cfg( + node, + project_modules, + local_modules, + filename, + module_definitions=None +): + visitor = ExprVisitor( + node, + project_modules, + local_modules, filename, + module_definitions + ) + return CFG( + visitor.nodes, + visitor.blackbox_assignments + ) diff --git a/pyt/expr_visitor.py b/pyt/cfg/expr_visitor.py similarity index 97% rename from pyt/expr_visitor.py rename to pyt/cfg/expr_visitor.py index 410d3b97..597f21f8 100644 --- a/pyt/expr_visitor.py +++ b/pyt/cfg/expr_visitor.py @@ -1,21 +1,20 @@ import ast -from .alias_helper import ( +from ..alias_helper import ( handle_aliases_in_calls ) -from .ast_helper import ( +from ..ast_helper import ( Arguments, get_call_names_as_string ) from .expr_visitor_helper import ( BUILTINS, - CFG, return_connection_handler, SavedVariable ) -from .label_visitor import LabelVisitor -from .module_definitions import ModuleDefinitions -from .node_types import ( +from ..label_visitor import LabelVisitor +from ..module_definitions import ModuleDefinitions +from ..node_types import ( AssignmentCallNode, AssignmentNode, BBorBInode, @@ -26,7 +25,7 @@ RestoreNode, ReturnNode ) -from .right_hand_side_visitor import RHSVisitor +from ..right_hand_side_visitor import RHSVisitor from .stmt_visitor import StmtVisitor from .stmt_visitor_helper import CALL_IDENTIFIER @@ -564,22 +563,3 @@ def visit_Call(self, node): # Mark the call as a blackbox because we don't have the definition return self.add_blackbox_or_builtin_call(node, blackbox=True) return self.add_blackbox_or_builtin_call(node, blackbox=False) - - -def make_cfg( - node, - project_modules, - local_modules, - filename, - module_definitions=None -): - visitor = ExprVisitor( - node, - project_modules, - local_modules, filename, - module_definitions - ) - return CFG( - visitor.nodes, - visitor.blackbox_assignments - ) diff --git a/pyt/expr_visitor_helper.py b/pyt/cfg/expr_visitor_helper.py similarity index 56% rename from pyt/expr_visitor_helper.py rename to pyt/cfg/expr_visitor_helper.py index aebeccd3..27e1df78 100644 --- a/pyt/expr_visitor_helper.py +++ b/pyt/cfg/expr_visitor_helper.py @@ -1,6 +1,6 @@ from collections import namedtuple -from .node_types import ConnectToExitNode +from ..node_types import ConnectToExitNode SavedVariable = namedtuple( @@ -33,24 +33,6 @@ ) -class CFG(): - def __init__(self, nodes, blackbox_assignments): - self.nodes = nodes - self.blackbox_assignments = blackbox_assignments - - def __repr__(self): - output = '' - for x, n in enumerate(self.nodes): - output = ''.join((output, 'Node: ' + str(x) + ' ' + repr(n), '\n\n')) - return output - - def __str__(self): - output = '' - for x, n in enumerate(self.nodes): - output = ''.join((output, 'Node: ' + str(x) + ' ' + str(n), '\n\n')) - return output - - def return_connection_handler(nodes, exit_node): """Connect all return statements to the Exit node.""" for function_body_node in nodes: diff --git a/pyt/stmt_visitor.py b/pyt/cfg/stmt_visitor.py similarity index 99% rename from pyt/stmt_visitor.py rename to pyt/cfg/stmt_visitor.py index c855c9bb..3df1e275 100644 --- a/pyt/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -2,24 +2,24 @@ import itertools import os.path -from .alias_helper import ( +from ..alias_helper import ( as_alias_handler, handle_aliases_in_init_files, handle_fdid_aliases, not_as_alias_handler, retrieve_import_alias_mapping ) -from .ast_helper import ( +from ..ast_helper import ( generate_ast, get_call_names_as_string ) -from .label_visitor import LabelVisitor -from .module_definitions import ( +from ..label_visitor import LabelVisitor +from ..module_definitions import ( LocalModuleDefinition, ModuleDefinition, ModuleDefinitions ) -from .node_types import ( +from ..node_types import ( AssignmentNode, AssignmentCallNode, BBorBInode, @@ -33,8 +33,8 @@ ReturnNode, TryNode ) -from .project_handler import get_directory_modules -from .right_hand_side_visitor import RHSVisitor +from ..project_handler import get_directory_modules +from ..right_hand_side_visitor import RHSVisitor from .stmt_visitor_helper import ( CALL_IDENTIFIER, ConnectStatements, @@ -45,7 +45,7 @@ get_last_statements, remove_breaks ) -from .vars_visitor import VarsVisitor +from ..vars_visitor import VarsVisitor class StmtVisitor(ast.NodeVisitor): diff --git a/pyt/stmt_visitor_helper.py b/pyt/cfg/stmt_visitor_helper.py similarity index 99% rename from pyt/stmt_visitor_helper.py rename to pyt/cfg/stmt_visitor_helper.py index 315c3332..a52f22b6 100644 --- a/pyt/stmt_visitor_helper.py +++ b/pyt/cfg/stmt_visitor_helper.py @@ -2,7 +2,7 @@ import random from collections import namedtuple -from .node_types import ( +from ..node_types import ( AssignmentCallNode, BBorBInode, BreakNode, diff --git a/pyt/framework_adaptor.py b/pyt/framework_adaptor.py index c2a49ef9..00f5a07b 100644 --- a/pyt/framework_adaptor.py +++ b/pyt/framework_adaptor.py @@ -3,7 +3,7 @@ import ast from .ast_helper import Arguments -from .expr_visitor import make_cfg +from .cfg import make_cfg from .module_definitions import project_definitions from .node_types import ( AssignmentNode, diff --git a/pyt/github_search.py b/pyt/github_search.py index 9e11f219..e58a5ee3 100644 --- a/pyt/github_search.py +++ b/pyt/github_search.py @@ -17,7 +17,7 @@ from .analysis.fixed_point import analyse from .argument_helpers import VulnerabilityFiles from .ast_helper import generate_ast -from .expr_visitor import make_cfg +from .cfg import make_cfg from .formatters import ( json, text diff --git a/tests/base_test_case.py b/tests/base_test_case.py index bbcb9d52..72fa0284 100644 --- a/tests/base_test_case.py +++ b/tests/base_test_case.py @@ -2,7 +2,7 @@ import unittest from pyt.ast_helper import generate_ast -from pyt.expr_visitor import make_cfg +from pyt.cfg import make_cfg from pyt.module_definitions import project_definitions @@ -27,30 +27,49 @@ def assertInCfg(self, connections): for element in range(nodes): for sets in range(nodes): if not (element, sets) in connections: - self.assertNotIn(self.cfg.nodes[element], self.cfg.nodes[sets].outgoing, "(%s <- %s)" % (element, sets) + " expected to be disconnected") - self.assertNotIn(self.cfg.nodes[sets], self.cfg.nodes[element].ingoing, "(%s <- %s)" % (sets, element) + " expected to be disconnected") + self.assertNotIn( + self.cfg.nodes[element], + self.cfg.nodes[sets].outgoing, + "(%s <- %s)" % (element, sets) + " expected to be disconnected" + ) + self.assertNotIn( + self.cfg.nodes[sets], + self.cfg.nodes[element].ingoing, + "(%s <- %s)" % (sets, element) + " expected to be disconnected" + ) def assertConnected(self, node, successor): """Asserts that a node is connected to its successor. This means that node has successor in its outgoing and successor has node in its ingoing.""" - self.assertIn(successor, node.outgoing, - '\n%s was NOT found in the outgoing list of %s containing: ' % (successor.label, node.label) + '[' + ', '.join([x.label for x in node.outgoing]) + ']') + self.assertIn( + successor, + node.outgoing, + '\n%s was NOT found in the outgoing list of %s containing: ' % (successor.label, node.label) + '[' + ', '.join([x.label for x in node.outgoing]) + ']' + ) - self.assertIn(node, successor.ingoing, - '\n%s was NOT found in the ingoing list of %s containing: ' % (node.label, successor.label) + '[' + ', '.join([x.label for x in successor.ingoing]) + ']') + self.assertIn( + node, + successor.ingoing, + '\n%s was NOT found in the ingoing list of %s containing: ' % (node.label, successor.label) + '[' + ', '.join([x.label for x in successor.ingoing]) + ']' + ) def assertNotConnected(self, node, successor): """Asserts that a node is not connected to its successor. This means that node does not the successor in its outgoing and successor does not have the node in its ingoing.""" - self.assertNotIn(successor, node.outgoing, - '\n%s was mistakenly found in the outgoing list of %s containing: ' % (successor.label, node.label) + '[' + ', '.join([x.label for x in node.outgoing]) + ']') - - self.assertNotIn(node, successor.ingoing, - '\n%s was mistakenly found in the ingoing list of %s containing: ' % (node.label, successor.label) + '[' + ', '.join([x.label for x in successor.ingoing]) + ']') + self.assertNotIn( + successor, + node.outgoing, + '\n%s was mistakenly found in the outgoing list of %s containing: ' % (successor.label, node.label) + '[' + ', '.join([x.label for x in node.outgoing]) + ']' + ) + self.assertNotIn( + node, + successor.ingoing, + '\n%s was mistakenly found in the ingoing list of %s containing: ' % (node.label, successor.label) + '[' + ', '.join([x.label for x in successor.ingoing]) + ']' + ) def assertLineNumber(self, node, line_number): self.assertEqual(node.line_number, line_number) @@ -70,11 +89,13 @@ def cfg_create_from_file(self, filename, project_modules=list(), local_modules=l self.cfg = make_cfg(tree, project_modules, local_modules, filename) def string_compare_alpha(self, output, expected_string): - return [char for char in output if char.isalpha()] \ - == \ - [char for char in expected_string if char.isalpha()] + return ( + [char for char in output if char.isalpha()] == + [char for char in expected_string if char.isalpha()] + ) def string_compare_alnum(self, output, expected_string): - return [char for char in output if char.isalnum()] \ - == \ - [char for char in expected_string if char.isalnum()] + return ( + [char for char in output if char.isalnum()] == + [char for char in expected_string if char.isalnum()] + ) From 6bd97ba0f1c7257bb0e91fb7d494860b33e9f879 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 21 Apr 2018 14:25:07 -0700 Subject: [PATCH 024/291] Move vulnerability files into their own submodule --- pyt/{ => cfg}/alias_helper.py | 1 + pyt/cfg/expr_visitor.py | 2 +- pyt/cfg/stmt_visitor.py | 2 +- pyt/vulnerabilities/__init__.py | 40 ++++++++++++++++ .../trigger_definitions_parser.py | 1 - pyt/{ => vulnerabilities}/vulnerabilities.py | 48 ++----------------- .../vulnerability_helper.py | 0 tests/base_test_case.py | 26 ++++++++-- tests/vulnerabilities_test.py | 11 +++-- 9 files changed, 76 insertions(+), 55 deletions(-) rename pyt/{ => cfg}/alias_helper.py (99%) create mode 100644 pyt/vulnerabilities/__init__.py rename pyt/{ => vulnerabilities}/trigger_definitions_parser.py (99%) rename pyt/{ => vulnerabilities}/vulnerabilities.py (91%) rename pyt/{ => vulnerabilities}/vulnerability_helper.py (100%) diff --git a/pyt/alias_helper.py b/pyt/cfg/alias_helper.py similarity index 99% rename from pyt/alias_helper.py rename to pyt/cfg/alias_helper.py index 9d294444..a1c83ab0 100644 --- a/pyt/alias_helper.py +++ b/pyt/cfg/alias_helper.py @@ -1,5 +1,6 @@ """This module contains alias helper functions for the expr_visitor module.""" + def as_alias_handler(alias_list): """Returns a list of all the names that will be called.""" list_ = list() diff --git a/pyt/cfg/expr_visitor.py b/pyt/cfg/expr_visitor.py index 597f21f8..2301fce6 100644 --- a/pyt/cfg/expr_visitor.py +++ b/pyt/cfg/expr_visitor.py @@ -1,6 +1,6 @@ import ast -from ..alias_helper import ( +from .alias_helper import ( handle_aliases_in_calls ) from ..ast_helper import ( diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index 3df1e275..85ed523c 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -2,7 +2,7 @@ import itertools import os.path -from ..alias_helper import ( +from .alias_helper import ( as_alias_handler, handle_aliases_in_init_files, handle_fdid_aliases, diff --git a/pyt/vulnerabilities/__init__.py b/pyt/vulnerabilities/__init__.py new file mode 100644 index 00000000..07224a49 --- /dev/null +++ b/pyt/vulnerabilities/__init__.py @@ -0,0 +1,40 @@ +import json + +from ..analysis.lattice import Lattice +from .trigger_definitions_parser import parse +from .vulnerabilities import find_vulnerabilities_in_cfg + + +def find_vulnerabilities( + cfg_list, + ui_mode, + vulnerability_files +): + """Find vulnerabilities in a list of CFGs from a trigger_word_file. + + Args: + cfg_list(list[CFG]): the list of CFGs to scan. + ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. + vulnerability_files(VulnerabilityFiles): contains trigger words and blackbox_mapping files + + Returns: + A list of vulnerabilities. + """ + vulnerabilities = list() + definitions = parse(vulnerability_files.triggers) + + with open(vulnerability_files.blackbox_mapping) as infile: + blackbox_mapping = json.load(infile) + for cfg in cfg_list: + find_vulnerabilities_in_cfg( + cfg, + definitions, + Lattice(cfg.nodes), + ui_mode, + blackbox_mapping, + vulnerabilities + ) + with open(vulnerability_files.blackbox_mapping, 'w') as outfile: + json.dump(blackbox_mapping, outfile, indent=4) + + return vulnerabilities diff --git a/pyt/trigger_definitions_parser.py b/pyt/vulnerabilities/trigger_definitions_parser.py similarity index 99% rename from pyt/trigger_definitions_parser.py rename to pyt/vulnerabilities/trigger_definitions_parser.py index 7515da4a..62cdbce0 100644 --- a/pyt/trigger_definitions_parser.py +++ b/pyt/vulnerabilities/trigger_definitions_parser.py @@ -1,4 +1,3 @@ -import os from collections import namedtuple diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities/vulnerabilities.py similarity index 91% rename from pyt/vulnerabilities.py rename to pyt/vulnerabilities/vulnerabilities.py index 58be0591..fafa212c 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities/vulnerabilities.py @@ -1,21 +1,18 @@ """Module for finding vulnerabilities based on a definitions file.""" import ast -import json from collections import namedtuple -from .analysis.definition_chains import build_def_use_chain -from .analysis.lattice import Lattice -from .argument_helpers import UImode -from .node_types import ( +from ..analysis.definition_chains import build_def_use_chain +from ..argument_helpers import UImode +from ..node_types import ( AssignmentNode, BBorBInode, IfNode, TaintedNode ) -from .right_hand_side_visitor import RHSVisitor -from .trigger_definitions_parser import parse -from .vars_visitor import VarsVisitor +from ..right_hand_side_visitor import RHSVisitor +from ..vars_visitor import VarsVisitor from .vulnerability_helper import ( vuln_factory, VulnerabilityType @@ -500,38 +497,3 @@ def find_vulnerabilities_in_cfg( ) if vulnerability: vulnerabilities_list.append(vulnerability) - - -def find_vulnerabilities( - cfg_list, - ui_mode, - vulnerability_files -): - """Find vulnerabilities in a list of CFGs from a trigger_word_file. - - Args: - cfg_list(list[CFG]): the list of CFGs to scan. - ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. - vulnerability_files(VulnerabilityFiles): contains trigger words and blackbox_mapping files - - Returns: - A list of vulnerabilities. - """ - vulnerabilities = list() - definitions = parse(vulnerability_files.triggers) - - with open(vulnerability_files.blackbox_mapping) as infile: - blackbox_mapping = json.load(infile) - for cfg in cfg_list: - find_vulnerabilities_in_cfg( - cfg, - definitions, - Lattice(cfg.nodes), - ui_mode, - blackbox_mapping, - vulnerabilities - ) - with open(vulnerability_files.blackbox_mapping, 'w') as outfile: - json.dump(blackbox_mapping, outfile, indent=4) - - return vulnerabilities diff --git a/pyt/vulnerability_helper.py b/pyt/vulnerabilities/vulnerability_helper.py similarity index 100% rename from pyt/vulnerability_helper.py rename to pyt/vulnerabilities/vulnerability_helper.py diff --git a/tests/base_test_case.py b/tests/base_test_case.py index 72fa0284..2683aae1 100644 --- a/tests/base_test_case.py +++ b/tests/base_test_case.py @@ -19,8 +19,16 @@ def assertInCfg(self, connections): at index 1 of the tuple. """ for connection in connections: - self.assertIn(self.cfg.nodes[connection[0]], self.cfg.nodes[connection[1]].outgoing, str(connection) + " expected to be connected") - self.assertIn(self.cfg.nodes[connection[1]], self.cfg.nodes[connection[0]].ingoing, str(connection) + " expected to be connected") + self.assertIn( + self.cfg.nodes[connection[0]], + self.cfg.nodes[connection[1]].outgoing, + str(connection) + " expected to be connected" + ) + self.assertIn( + self.cfg.nodes[connection[1]], + self.cfg.nodes[connection[0]].ingoing, + str(connection) + " expected to be connected" + ) nodes = len(self.cfg.nodes) @@ -83,10 +91,20 @@ def assert_length(self, _list, *, expected_length): actual_length = len(_list) self.assertEqual(expected_length, actual_length) - def cfg_create_from_file(self, filename, project_modules=list(), local_modules=list()): + def cfg_create_from_file( + self, + filename, + project_modules=list(), + local_modules=list() + ): project_definitions.clear() tree = generate_ast(filename) - self.cfg = make_cfg(tree, project_modules, local_modules, filename) + self.cfg = make_cfg( + tree, + project_modules, + local_modules, + filename + ) def string_compare_alpha(self, output, expected_string): return ( diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index d578866d..202f4ee4 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -2,7 +2,8 @@ from .base_test_case import BaseTestCase -from pyt import ( +from pyt.vulnerabilities import ( + find_vulnerabilities, trigger_definitions_parser, vulnerabilities ) @@ -32,7 +33,7 @@ def get_lattice_elements(self, cfg_nodes): return cfg_nodes def test_parse(self): - definitions = vulnerabilities.parse( + definitions = trigger_definitions_parser.parse( trigger_word_file=os.path.join( os.getcwd(), 'pyt', @@ -130,7 +131,7 @@ def run_analysis(self, path): analyse(cfg_list) - return vulnerabilities.find_vulnerabilities( + return find_vulnerabilities( cfg_list, UImode.NORMAL, VulnerabilityFiles( @@ -524,7 +525,7 @@ def run_analysis(self, path): 'django_trigger_words.pyt' ) - return vulnerabilities.find_vulnerabilities( + return find_vulnerabilities( cfg_list, UImode.NORMAL, VulnerabilityFiles( @@ -571,7 +572,7 @@ def run_analysis(self, path): 'all_trigger_words.pyt' ) - return vulnerabilities.find_vulnerabilities( + return find_vulnerabilities( cfg_list, UImode.NORMAL, VulnerabilityFiles( From 4cd3ea596e2ef0c834e206938af6963bbd08130d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Sun, 22 Apr 2018 13:51:49 +0300 Subject: [PATCH 025/291] nosec_lines for #108's issue Added args and nosec_lines --- pyt/__main__.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pyt/__main__.py b/pyt/__main__.py index 4ceb67d1..fd2ba223 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -142,6 +142,9 @@ def parse_args(args): '(only JSON-formatted files are accepted)', type=str, default=False) + parser.add_argument('-in', '--ignore-nosec', + help='Ignoring nosec commands', + action='http://github.com/store_true') save_parser = subparsers.add_parser('save', help='Save menu.') save_parser.set_defaults(which='save') @@ -307,6 +310,17 @@ def main(command_line_args=sys.argv[1:]): args.trigger_word_file ) ) + + if args.ignore_nosec: + nosec_lines = set() + else: + file = open(path, "r") + lines = file.readlines() + nosec_lines = set( + lineno for + (lineno, line) in enumerate(lines, start=1) + if '#nosec' in line or '# nosec' in line) + if args.baseline: vulnerabilities = get_vulnerabilities_not_in_baseline(vulnerabilities, args.baseline) From 1316bc079c7a28afbc48c5883d43b29a53352bb0 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sun, 22 Apr 2018 23:38:52 -0700 Subject: [PATCH 026/291] Move argparse code into usage.py, delete python 2 option, still need to fix https://stackoverflow.com/questions/24180527/argparse-required-arguments-listed-under-optional-arguments --- pyt/__main__.py | 116 +---------------- pyt/argument_helpers.py | 26 ---- pyt/ast_helper.py | 11 +- pyt/baseline.py | 8 +- pyt/module_definitions.py | 3 +- pyt/usage.py | 142 +++++++++++++++++++++ tests/vulnerabilities_across_files_test.py | 6 +- tests/vulnerabilities_test.py | 7 +- 8 files changed, 161 insertions(+), 158 deletions(-) create mode 100644 pyt/usage.py diff --git a/pyt/__main__.py b/pyt/__main__.py index 523809aa..06dc9f1e 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -1,16 +1,11 @@ """The comand line module of PyT.""" -import argparse import os import sys -from datetime import date from .analysis.constraint_table import initialize_constraint_table from .analysis.fixed_point import analyse from .argument_helpers import ( - default_blackbox_mapping_file, - default_trigger_word_file, - valid_date, VulnerabilityFiles, UImode ) @@ -37,117 +32,10 @@ get_directory_modules, get_modules ) +from .usage import parse_args from .vulnerabilities import find_vulnerabilities -def parse_args(args): - parser = argparse.ArgumentParser(prog='python -m pyt') - parser.set_defaults(which='') - - subparsers = parser.add_subparsers() - - entry_group = parser.add_mutually_exclusive_group(required=True) - entry_group.add_argument( - '-f', '--filepath', - help='Path to the file that should be analysed.', - type=str - ) - entry_group.add_argument( - '-gr', '--git-repos', - help='Takes a CSV file of git_url, path per entry.', - type=str - ) - - parser.add_argument( - '-pr', '--project-root', - help='Add project root, this is important when the entry ' - 'file is not at the root of the project.', - type=str - ) - parser.add_argument( - '-csv', '--csv-path', type=str, - help='Give the path of the csv file' - ' repos should be added to.' - ) - parser.add_argument( - '-t', '--trigger-word-file', - help='Input trigger word file.', - type=str, - default=default_trigger_word_file - ) - parser.add_argument( - '-m', '--blackbox-mapping-file', - help='Input blackbox mapping file.', - type=str, - default=default_blackbox_mapping_file - ) - parser.add_argument( - '-py2', '--python-2', - help='[WARNING, EXPERIMENTAL] Turns on Python 2 mode,' + - ' needed when target file(s) are written in Python 2.', - action='http://github.com/store_true' - ) - parser.add_argument( - '-l', '--log-level', - help='Choose logging level: CRITICAL, ERROR,' - ' WARNING(Default), INFO, DEBUG, NOTSET.', - type=str - ) - parser.add_argument( - '-a', '--adaptor', - help='Choose an adaptor: Flask(Default), Django, Every or Pylons', - type=str - ) - parser.add_argument( - '-j', '--json', - help='Prints JSON instead of report.', - action='http://github.com/store_true', - default=False - ) - parser.add_argument( - '-b', '--baseline', - help='path of a baseline report to compare against ' - '(only JSON-formatted files are accepted)', - type=str, - default=False - ) - - print_group = parser.add_mutually_exclusive_group() - print_group.add_argument( - '-trim', '--trim-reassigned-in', - help='Trims the reassigned list to the vulnerability chain.', - action='http://github.com/store_true', - default=False - ) - print_group.add_argument( - '-i', '--interactive', - help='Will ask you about each vulnerability chain and blackbox nodes.', - action='http://github.com/store_true', - default=False - ) - - search_parser = subparsers.add_parser( - 'github_search', - help='Searches through github and runs PyT ' - 'on found repositories. This can take some time.' - ) - search_parser.set_defaults(which='search') - search_parser.add_argument( - '-ss', '--search-string', required=True, - help='String for searching for repos on github.', - type=str - ) - search_parser.add_argument( - '-sd', '--start-date', - help='Start date for repo search. ' - 'Criteria used is Created Date.', - type=valid_date, - default=date(2010, 1, 1) - ) - - return parser.parse_args(args) - - def main(command_line_args=sys.argv[1:]): args = parse_args(command_line_args) @@ -181,7 +69,7 @@ def main(command_line_args=sys.argv[1:]): project_modules = get_modules(directory) local_modules = get_directory_modules(directory) - tree = generate_ast(path, python_2=args.python_2) + tree = generate_ast(path) cfg = make_cfg( tree, diff --git a/pyt/argument_helpers.py b/pyt/argument_helpers.py index 847636f9..bd076775 100644 --- a/pyt/argument_helpers.py +++ b/pyt/argument_helpers.py @@ -1,33 +1,7 @@ -import os -from argparse import ArgumentTypeError from collections import namedtuple -from datetime import datetime from enum import Enum -default_blackbox_mapping_file = os.path.join( - os.path.dirname(__file__), - 'vulnerability_definitions', - 'blackbox_mapping.json' -) - - -default_trigger_word_file = os.path.join( - os.path.dirname(__file__), - 'vulnerability_definitions', - 'all_trigger_words.pyt' -) - - -def valid_date(s): - date_format = "%Y-%m-%d" - try: - return datetime.strptime(s, date_format).date() - except ValueError: - msg = "Not a valid date: '{0}'. Format: {1}".format(s, date_format) - raise ArgumentTypeError(msg) - - class UImode(Enum): INTERACTIVE = 0 NORMAL = 1 diff --git a/pyt/ast_helper.py b/pyt/ast_helper.py index 985eee70..e741ac50 100644 --- a/pyt/ast_helper.py +++ b/pyt/ast_helper.py @@ -8,7 +8,6 @@ BLACK_LISTED_CALL_NAMES = ['self'] recursive = False -python_2_mode = False def convert_to_3(path): # pragma: no cover @@ -22,17 +21,12 @@ def convert_to_3(path): # pragma: no cover exit(1) -def generate_ast(path, python_2=False): +def generate_ast(path): """Generate an Abstract Syntax Tree using the ast module. Args: path(str): The path to the file e.g. example/foo/bar.py - python_2(bool): Determines whether or not to call 2to3. """ - # If set, it stays set. - global python_2_mode - if python_2: # pragma: no cover - python_2_mode = True if os.path.isfile(path): with open(path, 'r') as f: try: @@ -40,8 +34,7 @@ def generate_ast(path, python_2=False): except SyntaxError: # pragma: no cover global recursive if not recursive: - if not python_2_mode: - convert_to_3(path) + convert_to_3(path) recursive = True return generate_ast(path) else: diff --git a/pyt/baseline.py b/pyt/baseline.py index 1e3258a0..1dc3128b 100644 --- a/pyt/baseline.py +++ b/pyt/baseline.py @@ -1,10 +1,12 @@ import json -def get_vulnerabilities_not_in_baseline(vulnerabilities, baseline): - baseline = json.load(open(baseline)) +def get_vulnerabilities_not_in_baseline( + vulnerabilities, + baseline_file +): + baseline = json.load(open(baseline_file)) output = list() - vulnerabilities =[vuln for vuln in vulnerabilities] for vuln in vulnerabilities: if vuln.as_dict() not in baseline['vulnerabilities']: output.append(vuln) diff --git a/pyt/module_definitions.py b/pyt/module_definitions.py index bde14ce7..6ec197de 100644 --- a/pyt/module_definitions.py +++ b/pyt/module_definitions.py @@ -4,7 +4,8 @@ import ast -# Contains all project definitions for a program run: +# Contains all project definitions for a program run +# Only used in framework_adaptor.py, but modified here project_definitions = dict() diff --git a/pyt/usage.py b/pyt/usage.py new file mode 100644 index 00000000..1dc23c07 --- /dev/null +++ b/pyt/usage.py @@ -0,0 +1,142 @@ +import argparse +import os +from datetime import ( + date, + datetime +) + + +default_blackbox_mapping_file = os.path.join( + os.path.dirname(__file__), + 'vulnerability_definitions', + 'blackbox_mapping.json' +) + + +default_trigger_word_file = os.path.join( + os.path.dirname(__file__), + 'vulnerability_definitions', + 'all_trigger_words.pyt' +) + + +def valid_date(s): + date_format = "%Y-%m-%d" + try: + return datetime.strptime(s, date_format).date() + except ValueError: + msg = "Not a valid date: '{0}'. Format: {1}".format(s, date_format) + raise argparse.ArgumentTypeError(msg) + + +def _add_entry_group(parser): + entry_group = parser.add_mutually_exclusive_group(required=True) + entry_group.add_argument( + '-f', '--filepath', + help='Path to the file that should be analysed.', + type=str + ) + entry_group.add_argument( + '-gr', '--git-repos', + help='Takes a CSV file of git_url, path per entry.', + type=str, + metavar='CSV_FILE' + ) + + +def _add_regular_arguments(parser): + parser.add_argument( + '-r', '--root-directory', + help='Add project root, this is important when the entry ' + 'file is not at the root of the project.', + type=str, + metavar='DIR_TO_ANALYZE' + ) + parser.add_argument( + '-a', '--adaptor', + help='Choose a web framework adaptor: ' + 'Flask(Default), Django, Every or Pylons', + type=str + ) + parser.add_argument( + '-b', '--baseline', + help='Path of a baseline report to compare against ' + '(only JSON-formatted files are accepted)', + type=str, + default=False, + metavar='BASELINE_JSON_FILE', + ) + parser.add_argument( + '-j', '--json', + help='Prints JSON instead of report.', + action='http://github.com/store_true', + default=False + ) + parser.add_argument( + '-m', '--blackbox-mapping-file', + help='Input blackbox mapping file.', + type=str, + default=default_blackbox_mapping_file + ) + parser.add_argument( + '-t', '--trigger-word-file', + help='Input trigger word file.', + type=str, + default=default_trigger_word_file + ) + parser.add_argument( + '-csv', '--csv-path', + help='Give the path of the csv file ' + 'repos should be added to.', + type=str + ) + + +def _add_print_group(parser): + print_group = parser.add_mutually_exclusive_group() + print_group.add_argument( + '-trim', '--trim-reassigned-in', + help='Trims the reassigned list to the vulnerability chain.', + action='http://github.com/store_true', + default=True + ) + print_group.add_argument( + '--interactive', + help='Will ask you about each vulnerability chain and blackbox nodes.', + action='http://github.com/store_true', + default=False + ) + + +def _add_search_parser(parser): + subparsers = parser.add_subparsers() + search_parser = subparsers.add_parser( + 'github_search', + help='Searches through github and runs PyT ' + 'on found repositories. This can take some time.' + ) + search_parser.set_defaults(which='search') + search_parser.add_argument( + '-ss', '--search-string', required=True, + help='String for searching for repos on github.', + type=str + ) + search_parser.add_argument( + '-sd', '--start-date', + help='Start date for repo search. ' + 'Criteria used is Created Date.', + type=valid_date, + default=date(2010, 1, 1) + ) + + +def parse_args(args): + parser = argparse.ArgumentParser(prog='python -m pyt') + parser.set_defaults(which='') + + _add_entry_group(parser) + _add_regular_arguments(parser) + _add_print_group(parser) + _add_search_parser(parser) + + return parser.parse_args(args) diff --git a/tests/vulnerabilities_across_files_test.py b/tests/vulnerabilities_across_files_test.py index 30316c72..ef6cceb5 100644 --- a/tests/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities_across_files_test.py @@ -2,8 +2,6 @@ from .base_test_case import BaseTestCase from pyt.argument_helpers import ( - default_blackbox_mapping_file, - default_trigger_word_file, UImode, VulnerabilityFiles ) @@ -12,6 +10,10 @@ from pyt.framework_adaptor import FrameworkAdaptor from pyt.framework_helper import is_flask_route_function from pyt.project_handler import get_directory_modules, get_modules +from pyt.usage import ( + default_blackbox_mapping_file, + default_trigger_word_file +) from pyt.vulnerabilities import find_vulnerabilities diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index 202f4ee4..c67b36c1 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -10,8 +10,6 @@ from pyt.analysis.constraint_table import initialize_constraint_table from pyt.analysis.fixed_point import analyse from pyt.argument_helpers import ( - default_blackbox_mapping_file, - default_trigger_word_file, UImode, VulnerabilityFiles ) @@ -22,7 +20,10 @@ is_function ) from pyt.node_types import Node - +from pyt.usage import ( + default_blackbox_mapping_file, + default_trigger_word_file +) class EngineTest(BaseTestCase): def run_empty(self): From ae9e8e94de6f567aeef8beffcc5efb2e5bfdb136 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Mon, 23 Apr 2018 12:33:58 +0300 Subject: [PATCH 027/291] nosec_lines --- pyt/__main__.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index fd2ba223..9b197906 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -301,6 +301,7 @@ def main(command_line_args=sys.argv[1:]): analyse(cfg_list, analysis_type=analysis) + nosec_lines = set() vulnerabilities = find_vulnerabilities( cfg_list, analysis, @@ -308,7 +309,8 @@ def main(command_line_args=sys.argv[1:]): VulnerabilityFiles( args.blackbox_mapping_file, args.trigger_word_file - ) + ), + nosec_lines ) if args.ignore_nosec: @@ -319,7 +321,17 @@ def main(command_line_args=sys.argv[1:]): nosec_lines = set( lineno for (lineno, line) in enumerate(lines, start=1) - if '#nosec' in line or '# nosec' in line) + if '#nosec' in line or '# nosec' in line) + vulnerabilities = find_vulnerabilities( + cfg_list, + analysis, + ui_mode, + VulnerabilityFiles( + args.blackbox_mapping_file, + args.trigger_word_file + ), + nosec_lines + ) if args.baseline: vulnerabilities = get_vulnerabilities_not_in_baseline(vulnerabilities, args.baseline) From 1735db7be5d600fdc85af9d08bc5b03cb1edac17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Mon, 23 Apr 2018 12:37:20 +0300 Subject: [PATCH 028/291] added nosec_lines parameter --- pyt/vulnerabilities.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index ba7debff..8d76c040 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -73,7 +73,8 @@ def identify_triggers( cfg, sources, sinks, - lattice + lattice, + nosec_lines ): """Identify sources, sinks and sanitisers in a CFG. @@ -89,12 +90,12 @@ def identify_triggers( tainted_nodes = filter_cfg_nodes(cfg, TaintedNode) tainted_trigger_nodes = [TriggerNode('Framework function URL parameter', None, node) for node in tainted_nodes] - sources_in_file = find_triggers(assignment_nodes, sources) + sources_in_file = find_triggers(assignment_nodes, sources, nosec_lines) sources_in_file.extend(tainted_trigger_nodes) find_secondary_sources(assignment_nodes, sources_in_file, lattice) - sinks_in_file = find_triggers(cfg.nodes, sinks) + sinks_in_file = find_triggers(cfg.nodes, sinks, nosec_lines) sanitiser_node_dict = build_sanitiser_node_dict(cfg, sinks_in_file) @@ -170,7 +171,8 @@ def append_node_if_reassigned( def find_triggers( nodes, - trigger_words + trigger_words, + nosec_lines ): """Find triggers from the trigger_word_list in the nodes. @@ -183,7 +185,11 @@ def find_triggers( """ trigger_nodes = list() for node in nodes: - trigger_nodes.extend(iter(label_contains(node, trigger_words))) + if node.line_number not in nosec_lines: + trigger_nodes.extend(iter(label_contains(node, trigger_words))) + else: + pass + return trigger_nodes @@ -466,7 +472,8 @@ def find_vulnerabilities_in_cfg( lattice, ui_mode, blackbox_mapping, - vulnerabilities_list + vulnerabilities_list, + nosec_lines ): """Find vulnerabilities in a cfg. @@ -482,7 +489,8 @@ def find_vulnerabilities_in_cfg( cfg, definitions.sources, definitions.sinks, - lattice + lattice, + nosec_lines ) for sink in triggers.sinks: for source in triggers.sources: @@ -503,7 +511,8 @@ def find_vulnerabilities( cfg_list, analysis_type, ui_mode, - vulnerability_files + vulnerability_files, + nosec_lines ): """Find vulnerabilities in a list of CFGs from a trigger_word_file. @@ -518,19 +527,19 @@ def find_vulnerabilities( """ vulnerabilities = list() definitions = parse(vulnerability_files.triggers) - with open(vulnerability_files.blackbox_mapping) as infile: blackbox_mapping = json.load(infile) for cfg in cfg_list: + find_vulnerabilities_in_cfg( cfg, definitions, Lattice(cfg.nodes, analysis_type), ui_mode, blackbox_mapping, - vulnerabilities + vulnerabilities, + nosec_lines ) with open(vulnerability_files.blackbox_mapping, 'w') as outfile: json.dump(blackbox_mapping, outfile, indent=4) - return vulnerabilities From fb88051e1d988d5890127ef3aa6867adf0db07de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Mon, 23 Apr 2018 12:38:13 +0300 Subject: [PATCH 029/291] Update vulnerabilities.py --- pyt/vulnerabilities.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index 8d76c040..5e9bd675 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -189,7 +189,6 @@ def find_triggers( trigger_nodes.extend(iter(label_contains(node, trigger_words))) else: pass - return trigger_nodes From 079a235f9aec15886ab7f4c186a0e80c567b3c7c Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 23 Apr 2018 22:24:26 -0700 Subject: [PATCH 030/291] A great minimalist usage.py --- pyt/usage.py | 88 ++++++++++++++++++++-------------------------------- 1 file changed, 34 insertions(+), 54 deletions(-) diff --git a/pyt/usage.py b/pyt/usage.py index 1dc23c07..17d4587b 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -29,36 +29,32 @@ def valid_date(s): raise argparse.ArgumentTypeError(msg) -def _add_entry_group(parser): - entry_group = parser.add_mutually_exclusive_group(required=True) - entry_group.add_argument( +def _add_required_group(parser): + required_group = parser.add_argument_group('required arguments') + required_group.add_argument( '-f', '--filepath', help='Path to the file that should be analysed.', type=str ) - entry_group.add_argument( - '-gr', '--git-repos', - help='Takes a CSV file of git_url, path per entry.', - type=str, - metavar='CSV_FILE' - ) - - -def _add_regular_arguments(parser): - parser.add_argument( + required_group.add_argument( '-r', '--root-directory', help='Add project root, this is important when the entry ' 'file is not at the root of the project.', type=str, metavar='DIR_TO_ANALYZE' ) - parser.add_argument( + + +def _add_optional_group(parser): + optional_group = parser.add_argument_group('optional arguments') + + optional_group.add_argument( '-a', '--adaptor', help='Choose a web framework adaptor: ' 'Flask(Default), Django, Every or Pylons', type=str ) - parser.add_argument( + optional_group.add_argument( '-b', '--baseline', help='Path of a baseline report to compare against ' '(only JSON-formatted files are accepted)', @@ -66,30 +62,24 @@ def _add_regular_arguments(parser): default=False, metavar='BASELINE_JSON_FILE', ) - parser.add_argument( + optional_group.add_argument( '-j', '--json', help='Prints JSON instead of report.', action='http://github.com/store_true', default=False ) - parser.add_argument( + optional_group.add_argument( '-m', '--blackbox-mapping-file', help='Input blackbox mapping file.', type=str, default=default_blackbox_mapping_file ) - parser.add_argument( + optional_group.add_argument( '-t', '--trigger-word-file', - help='Input trigger word file.', + help='Input file with a list of sources and sinks', type=str, default=default_trigger_word_file ) - parser.add_argument( - '-csv', '--csv-path', - help='Give the path of the csv file ' - 'repos should be added to.', - type=str - ) def _add_print_group(parser): @@ -101,42 +91,32 @@ def _add_print_group(parser): default=True ) print_group.add_argument( - '--interactive', + '-i', '--interactive', help='Will ask you about each vulnerability chain and blackbox nodes.', action='http://github.com/store_true', default=False ) - -def _add_search_parser(parser): - subparsers = parser.add_subparsers() - search_parser = subparsers.add_parser( - 'github_search', - help='Searches through github and runs PyT ' - 'on found repositories. This can take some time.' - ) - search_parser.set_defaults(which='search') - search_parser.add_argument( - '-ss', '--search-string', required=True, - help='String for searching for repos on github.', - type=str - ) - search_parser.add_argument( - '-sd', '--start-date', - help='Start date for repo search. ' - 'Criteria used is Created Date.', - type=valid_date, - default=date(2010, 1, 1) - ) - +def _check_required_and_mutually_exclusive_args(parser, args): + if args.filepath is None and args.git_repos is None: + parser.error('one of the arguments -f/--filepath -gr/--git-repos is required') + if args.filepath and args.git_repos: + parser.error('argument -f/--filepath: not allowed with argument -gr/--git-repos') + if args.trim_reassigned_in and args.interactive: + parser.error('argument -i/--interactive: not allowed with argument -trim/--trim-reassigned-in') def parse_args(args): + if len(args) == 0: + args.append('-h') parser = argparse.ArgumentParser(prog='python -m pyt') - parser.set_defaults(which='') - - _add_entry_group(parser) - _add_regular_arguments(parser) + parser._action_groups.pop() + _add_required_group(parser) + _add_optional_group(parser) _add_print_group(parser) - _add_search_parser(parser) - return parser.parse_args(args) + args = parser.parse_args(args) + _check_required_and_mutually_exclusive_args( + parser, + args + ) + return args From 867f344e326a1b0a6bafa777c10b39bbd16bd184 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 23 Apr 2018 23:06:14 -0700 Subject: [PATCH 031/291] Take code out of __init__ files, __all__ the things, delete more stuff --- .coveragerc | 3 +- pyt/__main__.py | 40 +-- pyt/argument_helpers.py | 17 -- pyt/cfg/__init__.py | 39 +-- pyt/cfg/make_cfg.py | 38 +++ pyt/github_search.py | 320 -------------------- pyt/repo_runner.py | 92 ------ pyt/vulnerabilities/__init__.py | 42 +-- pyt/vulnerabilities/vulnerabilities.py | 95 +++--- pyt/vulnerabilities/vulnerability_helper.py | 60 +++- tests/command_line_test.py | 34 --- tests/github_search_test.py | 21 -- tests/vulnerabilities_across_files_test.py | 16 +- tests/vulnerabilities_test.py | 34 +-- 14 files changed, 174 insertions(+), 677 deletions(-) delete mode 100644 pyt/argument_helpers.py create mode 100644 pyt/cfg/make_cfg.py delete mode 100644 pyt/github_search.py delete mode 100644 pyt/repo_runner.py delete mode 100644 tests/command_line_test.py delete mode 100644 tests/github_search_test.py diff --git a/.coveragerc b/.coveragerc index 48d1154f..2678c03a 100644 --- a/.coveragerc +++ b/.coveragerc @@ -12,7 +12,6 @@ exclude_lines = source = ./pyt omit = pyt/__main__.py + pyt/usage.py pyt/formatters/json.py pyt/formatters/text.py - pyt/github_search.py - pyt/repo_runner.py diff --git a/pyt/__main__.py b/pyt/__main__.py index 06dc9f1e..b12bdf8b 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -5,10 +5,6 @@ from .analysis.constraint_table import initialize_constraint_table from .analysis.fixed_point import analyse -from .argument_helpers import ( - VulnerabilityFiles, - UImode -) from .ast_helper import generate_ast from .baseline import get_vulnerabilities_not_in_baseline from .cfg import make_cfg @@ -23,17 +19,15 @@ is_function, is_function_without_leading_ ) -from .github_search import ( - analyse_repos, - scan_github, - set_github_api_token -) from .project_handler import ( get_directory_modules, get_modules ) from .usage import parse_args -from .vulnerabilities import find_vulnerabilities +from .vulnerabilities import ( + find_vulnerabilities, + UImode +) def main(command_line_args=sys.argv[1:]): @@ -45,25 +39,9 @@ def main(command_line_args=sys.argv[1:]): elif args.trim_reassigned_in: ui_mode = UImode.TRIM - cfg_list = list() - if args.git_repos: - analyse_repos( - args, - ui_mode - ) - exit() - elif args.which == 'search': - set_github_api_token() - scan_github( - args, - ui_mode - ) - exit() - path = os.path.normpath(args.filepath) - - if args.project_root: - directory = os.path.normpath(args.project_root) + if args.root_directory: + directory = os.path.normpath(args.root_directory) else: directory = os.path.dirname(path) project_modules = get_modules(directory) @@ -99,10 +77,8 @@ def main(command_line_args=sys.argv[1:]): vulnerabilities = find_vulnerabilities( cfg_list, ui_mode, - VulnerabilityFiles( - args.blackbox_mapping_file, - args.trigger_word_file - ) + args.blackbox_mapping_file, + args.trigger_word_file ) if args.baseline: vulnerabilities = get_vulnerabilities_not_in_baseline( diff --git a/pyt/argument_helpers.py b/pyt/argument_helpers.py deleted file mode 100644 index bd076775..00000000 --- a/pyt/argument_helpers.py +++ /dev/null @@ -1,17 +0,0 @@ -from collections import namedtuple -from enum import Enum - - -class UImode(Enum): - INTERACTIVE = 0 - NORMAL = 1 - TRIM = 2 - - -VulnerabilityFiles = namedtuple( - 'VulnerabilityFiles', - ( - 'blackbox_mapping', - 'triggers' - ) -) diff --git a/pyt/cfg/__init__.py b/pyt/cfg/__init__.py index 710c5f66..30037a72 100644 --- a/pyt/cfg/__init__.py +++ b/pyt/cfg/__init__.py @@ -1,38 +1,3 @@ -from .expr_visitor import ExprVisitor +from .make_cfg import make_cfg - -class CFG(): - def __init__(self, nodes, blackbox_assignments): - self.nodes = nodes - self.blackbox_assignments = blackbox_assignments - - def __repr__(self): - output = '' - for x, n in enumerate(self.nodes): - output = ''.join((output, 'Node: ' + str(x) + ' ' + repr(n), '\n\n')) - return output - - def __str__(self): - output = '' - for x, n in enumerate(self.nodes): - output = ''.join((output, 'Node: ' + str(x) + ' ' + str(n), '\n\n')) - return output - - -def make_cfg( - node, - project_modules, - local_modules, - filename, - module_definitions=None -): - visitor = ExprVisitor( - node, - project_modules, - local_modules, filename, - module_definitions - ) - return CFG( - visitor.nodes, - visitor.blackbox_assignments - ) +__all__ = ['make_cfg'] diff --git a/pyt/cfg/make_cfg.py b/pyt/cfg/make_cfg.py new file mode 100644 index 00000000..710c5f66 --- /dev/null +++ b/pyt/cfg/make_cfg.py @@ -0,0 +1,38 @@ +from .expr_visitor import ExprVisitor + + +class CFG(): + def __init__(self, nodes, blackbox_assignments): + self.nodes = nodes + self.blackbox_assignments = blackbox_assignments + + def __repr__(self): + output = '' + for x, n in enumerate(self.nodes): + output = ''.join((output, 'Node: ' + str(x) + ' ' + repr(n), '\n\n')) + return output + + def __str__(self): + output = '' + for x, n in enumerate(self.nodes): + output = ''.join((output, 'Node: ' + str(x) + ' ' + str(n), '\n\n')) + return output + + +def make_cfg( + node, + project_modules, + local_modules, + filename, + module_definitions=None +): + visitor = ExprVisitor( + node, + project_modules, + local_modules, filename, + module_definitions + ) + return CFG( + visitor.nodes, + visitor.blackbox_assignments + ) diff --git a/pyt/github_search.py b/pyt/github_search.py deleted file mode 100644 index e58a5ee3..00000000 --- a/pyt/github_search.py +++ /dev/null @@ -1,320 +0,0 @@ -import os -import re -import requests -import sys -import time -from abc import ( - ABCMeta, - abstractmethod -) -from datetime import ( - date, - datetime, - timedelta -) -from . import repo_runner -from .analysis.constraint_table import initialize_constraint_table -from .analysis.fixed_point import analyse -from .argument_helpers import VulnerabilityFiles -from .ast_helper import generate_ast -from .cfg import make_cfg -from .formatters import ( - json, - text -) -from .project_handler import ( - get_directory_modules, - get_modules -) -from .repo_runner import ( - add_repo_to_csv, - get_repos, - NoEntryPathError -) -from .vulnerabilities import find_vulnerabilities - - -DEFAULT_TIMEOUT_IN_SECONDS = 60 -GITHUB_API_URL = 'https://api.github.com/' -GITHUB_OAUTH_TOKEN = None -NUMBER_OF_REQUESTS_ALLOWED_PER_MINUTE = 30 # Rate limit is 10 and 30 with auth -SEARCH_CODE_URL = GITHUB_API_URL + '/search/code' -SEARCH_REPO_URL = GITHUB_API_URL + '/search/repositories' - - -def set_github_api_token(): - global GITHUB_OAUTH_TOKEN - try: - GITHUB_OAUTH_TOKEN = open( - 'github_access_token.pyt', - 'r' - ).read().strip() - except FileNotFoundError: - print('Insert your GitHub access token' - ' in the github_access_token.pyt file in the pyt package' - ' if you want to use GitHub search.') - exit(0) - - -class Query: - def __init__( - self, - base_url, - search_string, - repo=None, - time_interval=None, - per_page=100 - ): - repo = self._repo_parameter(repo) - time_interval = self._time_interval_parameter(time_interval) - search_string = self._search_parameter(search_string) - per_page = self._per_page_parameter(per_page) - parameters = self._construct_parameters([ - search_string, - 'language:python', - repo, - time_interval, - per_page - ]) - self.query_string = self._construct_query(base_url, parameters) - - def _construct_query(self, base_url, parameters): - query = base_url - query += '+'.join(parameters) - return query - - def _construct_parameters(self, parameters): - r = list() - for p in parameters: - if p: - r.append(p) - return r - - def _search_parameter(self, search_string): - return '?q="' + search_string + '"' - - def _repo_parameter(self, repo): - if repo: - return 'repo:' + repo.name - else: - return None - - def _time_interval_parameter(self, created): - if created: - p = re.compile('\d\d\d\d-\d\d-\d\d \.\. \d\d\d\d-\d\d-\d\d') - m = p.match(created) - if m.group(): - return 'created:"' + m.group() + '"' - else: - print('The time interval parameter should be ' - 'of the form: "YYYY-MM-DD .. YYYY-MM-DD"') - exit(1) - return None - - def _per_page_parameter(self, per_page): - if per_page > 100: - print('The GitHub api does not allow pages with over 100 results.') - exit(1) - return '&per_page={}'.format(per_page) - - -class IncompleteResultsError(Exception): - pass - - -class RequestCounter: - def __init__(self, timeout=DEFAULT_TIMEOUT_IN_SECONDS): - self.timeout_in_seconds = timeout # timeout in seconds - self.counter = list() - - def append(self, request_time): - if len(self.counter) < NUMBER_OF_REQUESTS_ALLOWED_PER_MINUTE: - self.counter.append(request_time) - else: - delta = request_time - self.counter[0] - if delta.seconds < self.timeout_in_seconds: - print( - 'Maximum requests "{}" reached' - ' timing out for {} seconds.' - .format( - len(self.counter), - self.timeout_in_seconds - delta.seconds - ) - ) - self.timeout(self.timeout_in_seconds - delta.seconds) - self.counter.pop(0) # pop index 0 - self.counter.append(datetime.now()) - else: - self.counter.pop(0) # pop index 0 - self.counter.append(request_time) - - def timeout(self, time_in_seconds=DEFAULT_TIMEOUT_IN_SECONDS): - time.sleep(time_in_seconds) - - -class Search(metaclass=ABCMeta): - request_counter = RequestCounter() - - def __init__(self, query): - self.total_count = None - self.incomplete_results = None - self.results = list() - self._request(query.query_string) - - def _request(self, query_string): - Search.request_counter.append(datetime.now()) - - print('Making request: {}'.format(query_string)) - - headers = {'Authorization': 'token ' + GITHUB_OAUTH_TOKEN} - r = requests.get(query_string, headers=headers) - - response_body = r.json() - - if r.status_code != 200: - print('Bad request:') - print(r.status_code) - print(response_body) - Search.request_counter.timeout() - self._request(query_string) - return - - self.total_count = response_body['total_count'] - print('Number of results: {}.'.format(self.total_count)) - self.incomplete_results = response_body['incomplete_results'] - if self.incomplete_results: - raise IncompleteResultsError() - self.parse_results(response_body['items']) - - @abstractmethod - def parse_results(self, json_results): - pass - - -class SearchRepo(Search): - def parse_results(self, json_results): - for item in json_results: - self.results.append(Repo(item)) - - -class SearchCode(Search): - def parse_results(self, json_results): - for item in json_results: - self.results.append(File(item)) - - -class File: - def __init__(self, item): - self.name = item['name'] - self.repo = Repo(item['repository']) - - -class Repo: - def __init__(self, item): - self.url = item['html_url'] - self.name = item['full_name'] - - -def get_dates( - start_date, - end_date=date.today() -): - interval = 7 - delta = end_date - start_date - for i in range((delta.days // interval) + 1): - yield ( - start_date + timedelta(days=i * interval), - start_date + timedelta(days=i * interval + interval) - ) - - -def analyse_repo( - args, - github_repo, - ui_mode -): - directory = os.path.dirname(github_repo.path) - project_modules = get_modules(directory) - local_modules = get_directory_modules(directory) - tree = generate_ast(github_repo.path) - cfg = make_cfg( - tree, - project_modules, - local_modules, - github_repo.path - ) - cfg_list = list(cfg) - - initialize_constraint_table(cfg_list) - analyse(cfg_list) - vulnerabilities = find_vulnerabilities( - cfg_list, - ui_mode, - VulnerabilityFiles( - args.blackbox_mapping_file, - args.trigger_word_file - ) - ) - return vulnerabilities - - -def scan_github( - cmd_line_args, - ui_mode -): - for range_start, range_end in get_dates(cmd_line_args.start_date): - query = Query( - SEARCH_REPO_URL, - cmd_line_args.search_string, - time_interval='{} .. {}'.format( - range_start, - range_end - ), - per_page=100 - ) - search_repos = SearchRepo(query) - for repo in search_repos.results: - query = Query( - SEARCH_CODE_URL, - 'app = Flask(__name__)', - repo - ) - search_code = SearchCode(query) - if search_code.results: - repo = repo_runner.Repo(repo.url) - try: - repo.clone() - except NoEntryPathError as err: - print('NoEntryPathError for {}'.format(repo.url)) - continue - vulnerabilities = analyse_repo( - cmd_line_args, - repo, - ui_mode - ) - with open(repo.path + '.pyt', 'a') as fd: - if cmd_line_args.json: - json.report(vulnerabilities, fd) - else: - text.report(vulnerabilities, fd) - - if vulnerabilities: - add_repo_to_csv(cmd_line_args.csv_path, repo) - repo.clean_up() - - -def analyse_repos(cmd_line_args, ui_mode): - repos = get_repos(cmd_line_args.git_repos) - for repo in repos: - repo.clone() - vulnerabilities = analyse_repo( - cmd_line_args, - repo, - ui_mode - ) - if cmd_line_args.json: - json.report(vulnerabilities, sys.stdout) - else: - text.report(vulnerabilities, sys.stdout) - if not vulnerabilities: - repo.clean_up() diff --git a/pyt/repo_runner.py b/pyt/repo_runner.py deleted file mode 100644 index 7d6acba9..00000000 --- a/pyt/repo_runner.py +++ /dev/null @@ -1,92 +0,0 @@ -"""Runs PyT on a CSV file of git repos.""" -import os -import shutil - -import git - - -DEFAULT_CSV_PATH = 'flask_open_source_apps.csv' - - -class NoEntryPathError(Exception): - pass - - -class Repo: - """Holder for a repo with git URL and - a path to where the analysis should start.""" - - def __init__( - self, - URL, - path=None - ): - self.URL = URL.strip() - self.directory = None - self.path = path.strip() if path else None - - def clone(self): - """Clone repo and update path to match the current one.""" - - repo = self.URL.split('/')[-1].split('.') - if len(repo) > 1: - self.directory = '.'.join(repo[:-1]) - else: - self.directory = repo[0] - - if self.directory not in os.listdir(): - git.Git().clone(self.URL) - - if self.path is None: - self._find_entry_path() - elif self.path[0] == '/': - self.path = self.path[1:] - self.path = os.path.join(self.directory, self.path) - - def _find_entry_path(self): - for root, dirs, files in os.walk(self.directory): - for f in files: - if f.endswith('.py'): - with open(os.path.join(root, f), 'r') as fd: - if 'app = Flask(__name__)' in fd.read(): - self.path = os.path.join(root, f) - return - raise NoEntryPathError( - 'No entry path found in repo {}.' - .format(self.URL) - ) - - def clean_up(self): - """Deletes the repo""" - shutil.rmtree(self.directory) - - -def get_repos(csv_path): - """Parses a CSV file containing repos.""" - repos = list() - with open(csv_path, 'r') as fd: - for line in fd: - url, path = line.split(',') - repos.append(Repo(url, path)) - return repos - - -def add_repo_to_csv( - repo, - csv_path=DEFAULT_CSV_PATH -): - try: - with open(csv_path, 'a') as fd: - fd.write( - '{}{}, {}'.format( - os.linesep, - repo.URL, - repo.path - ) - ) - except FileNotFoundError: - print('-csv file not used and fallback path not found: {}' - .format(DEFAULT_CSV_PATH)) - print('To specify the csv_path ' - 'use the "-csv" option.') - exit(1) diff --git a/pyt/vulnerabilities/__init__.py b/pyt/vulnerabilities/__init__.py index 07224a49..cd9670bc 100644 --- a/pyt/vulnerabilities/__init__.py +++ b/pyt/vulnerabilities/__init__.py @@ -1,40 +1,4 @@ -import json +from .vulnerabilities import find_vulnerabilities +from .vulnerability_helper import UImode -from ..analysis.lattice import Lattice -from .trigger_definitions_parser import parse -from .vulnerabilities import find_vulnerabilities_in_cfg - - -def find_vulnerabilities( - cfg_list, - ui_mode, - vulnerability_files -): - """Find vulnerabilities in a list of CFGs from a trigger_word_file. - - Args: - cfg_list(list[CFG]): the list of CFGs to scan. - ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. - vulnerability_files(VulnerabilityFiles): contains trigger words and blackbox_mapping files - - Returns: - A list of vulnerabilities. - """ - vulnerabilities = list() - definitions = parse(vulnerability_files.triggers) - - with open(vulnerability_files.blackbox_mapping) as infile: - blackbox_mapping = json.load(infile) - for cfg in cfg_list: - find_vulnerabilities_in_cfg( - cfg, - definitions, - Lattice(cfg.nodes), - ui_mode, - blackbox_mapping, - vulnerabilities - ) - with open(vulnerability_files.blackbox_mapping, 'w') as outfile: - json.dump(blackbox_mapping, outfile, indent=4) - - return vulnerabilities +__all__ = ['find_vulnerabilities', 'UImode'] diff --git a/pyt/vulnerabilities/vulnerabilities.py b/pyt/vulnerabilities/vulnerabilities.py index fafa212c..5132f7b5 100644 --- a/pyt/vulnerabilities/vulnerabilities.py +++ b/pyt/vulnerabilities/vulnerabilities.py @@ -1,10 +1,10 @@ """Module for finding vulnerabilities based on a definitions file.""" import ast -from collections import namedtuple +import json from ..analysis.definition_chains import build_def_use_chain -from ..argument_helpers import UImode +from ..analysis.lattice import Lattice from ..node_types import ( AssignmentNode, BBorBInode, @@ -12,60 +12,18 @@ TaintedNode ) from ..right_hand_side_visitor import RHSVisitor +from .trigger_definitions_parser import parse from ..vars_visitor import VarsVisitor from .vulnerability_helper import ( + Sanitiser, + TriggerNode, + Triggers, vuln_factory, - VulnerabilityType + VulnerabilityType, + UImode ) -Sanitiser = namedtuple( - 'Sanitiser', - ( - 'trigger_word', - 'cfg_node' - ) -) -Triggers = namedtuple( - 'Triggers', - ( - 'sources', - 'sinks', - 'sanitiser_dict' - ) -) - - -class TriggerNode(): - def __init__(self, trigger_word, sanitisers, cfg_node, secondary_nodes=[]): - self.trigger_word = trigger_word - self.sanitisers = sanitisers - self.cfg_node = cfg_node - self.secondary_nodes = secondary_nodes - - def append(self, cfg_node): - if not cfg_node == self.cfg_node: - if self.secondary_nodes and cfg_node not in self.secondary_nodes: - self.secondary_nodes.append(cfg_node) - elif not self.secondary_nodes: - self.secondary_nodes = [cfg_node] - - def __repr__(self): - output = 'TriggerNode(' - - if self.trigger_word: - output = '{} trigger_word is {}, '.format( - output, - self.trigger_word - ) - - return ( - output + - 'sanitisers are {}, '.format(self.sanitisers) + - 'cfg_node is {})\n'.format(self.cfg_node) - ) - - def identify_triggers( cfg, sources, @@ -497,3 +455,40 @@ def find_vulnerabilities_in_cfg( ) if vulnerability: vulnerabilities_list.append(vulnerability) + + +def find_vulnerabilities( + cfg_list, + ui_mode, + blackbox_mapping_file, + source_sink_file +): + """Find vulnerabilities in a list of CFGs from a trigger_word_file. + + Args: + cfg_list(list[CFG]): the list of CFGs to scan. + ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. + blackbox_mapping_file(str) + source_sink_file(str) + + Returns: + A list of vulnerabilities. + """ + vulnerabilities = list() + definitions = parse(source_sink_file) + + with open(blackbox_mapping_file) as infile: + blackbox_mapping = json.load(infile) + for cfg in cfg_list: + find_vulnerabilities_in_cfg( + cfg, + definitions, + Lattice(cfg.nodes), + ui_mode, + blackbox_mapping, + vulnerabilities + ) + with open(blackbox_mapping_file, 'w') as outfile: + json.dump(blackbox_mapping, outfile, indent=4) + + return vulnerabilities diff --git a/pyt/vulnerabilities/vulnerability_helper.py b/pyt/vulnerabilities/vulnerability_helper.py index 832160e0..d31c5ef7 100644 --- a/pyt/vulnerabilities/vulnerability_helper.py +++ b/pyt/vulnerabilities/vulnerability_helper.py @@ -1,8 +1,7 @@ -"""This module contains vulnerability types and helpers. +"""This module contains vulnerability types, Enums, nodes and helpers.""" -It is only used in vulnerabilities.py -""" from enum import Enum +from collections import namedtuple class VulnerabilityType(Enum): @@ -12,6 +11,12 @@ class VulnerabilityType(Enum): UNKNOWN = 3 +class UImode(Enum): + INTERACTIVE = 0 + NORMAL = 1 + TRIM = 2 + + def vuln_factory(vulnerability_type): if vulnerability_type == VulnerabilityType.UNKNOWN: return UnknownVulnerability @@ -134,3 +139,52 @@ def __str__(self): '\nThis vulnerability is unknown due to: ' + str(self.unknown_assignment) ) + + +Sanitiser = namedtuple( + 'Sanitiser', + ( + 'trigger_word', + 'cfg_node' + ) +) + + +Triggers = namedtuple( + 'Triggers', + ( + 'sources', + 'sinks', + 'sanitiser_dict' + ) +) + + +class TriggerNode(): + def __init__(self, trigger_word, sanitisers, cfg_node, secondary_nodes=[]): + self.trigger_word = trigger_word + self.sanitisers = sanitisers + self.cfg_node = cfg_node + self.secondary_nodes = secondary_nodes + + def append(self, cfg_node): + if not cfg_node == self.cfg_node: + if self.secondary_nodes and cfg_node not in self.secondary_nodes: + self.secondary_nodes.append(cfg_node) + elif not self.secondary_nodes: + self.secondary_nodes = [cfg_node] + + def __repr__(self): + output = 'TriggerNode(' + + if self.trigger_word: + output = '{} trigger_word is {}, '.format( + output, + self.trigger_word + ) + + return ( + output + + 'sanitisers are {}, '.format(self.sanitisers) + + 'cfg_node is {})\n'.format(self.cfg_node) + ) diff --git a/tests/command_line_test.py b/tests/command_line_test.py deleted file mode 100644 index 6dbd79f2..00000000 --- a/tests/command_line_test.py +++ /dev/null @@ -1,34 +0,0 @@ -"""This just tests __main__.py""" -import sys -from contextlib import contextmanager -from io import StringIO - -from .base_test_case import BaseTestCase -from pyt.__main__ import parse_args - - -@contextmanager -def capture_sys_output(): - capture_out, capture_err = StringIO(), StringIO() - current_out, current_err = sys.stdout, sys.stderr - try: - sys.stdout, sys.stderr = capture_out, capture_err - yield capture_out, capture_err - finally: - sys.stdout, sys.stderr = current_out, current_err - - -class CommandLineTest(BaseTestCase): - def test_no_args(self): - with self.assertRaises(SystemExit): - with capture_sys_output() as (_, stderr): - parse_args([]) - - EXPECTED = """usage: python -m pyt [-h] (-f FILEPATH | -gr GIT_REPOS) [-pr PROJECT_ROOT] - [-csv CSV_PATH] [-t TRIGGER_WORD_FILE] - [-m BLACKBOX_MAPPING_FILE] [-py2] [-l LOG_LEVEL] - [-a ADAPTOR] [-j] [-b BASELINE] [-trim | -i] - {github_search} ...\n""" + \ - "python -m pyt: error: one of the arguments " + \ - "-f/--filepath -gr/--git-repos is required\n" - self.assertEqual(stderr.getvalue(), EXPECTED) diff --git a/tests/github_search_test.py b/tests/github_search_test.py deleted file mode 100644 index be1539b1..00000000 --- a/tests/github_search_test.py +++ /dev/null @@ -1,21 +0,0 @@ -import unittest -from datetime import date - -from pyt.github_search import get_dates - - -class GetDatesTest(unittest.TestCase): - def test_get_dates(self): - date_ranges = get_dates( - date(2018, 1, 1), - date(2018, 1, 31) - ) - EXPECTED_RANGE = ( - ('2018-01-01', '2018-01-08'), - ('2018-01-08', '2018-01-15'), - ('2018-01-15', '2018-01-22'), - ('2018-01-22', '2018-01-29') - ) - for date_range, expected_range in zip(date_ranges, EXPECTED_RANGE): - for date_, expected_date in zip(date_range, expected_range): - assert str(date_) == expected_date diff --git a/tests/vulnerabilities_across_files_test.py b/tests/vulnerabilities_across_files_test.py index ef6cceb5..b405383b 100644 --- a/tests/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities_across_files_test.py @@ -1,10 +1,7 @@ import os from .base_test_case import BaseTestCase -from pyt.argument_helpers import ( - UImode, - VulnerabilityFiles -) + from pyt.analysis.constraint_table import initialize_constraint_table from pyt.analysis.fixed_point import analyse from pyt.framework_adaptor import FrameworkAdaptor @@ -14,7 +11,10 @@ default_blackbox_mapping_file, default_trigger_word_file ) -from pyt.vulnerabilities import find_vulnerabilities +from pyt.vulnerabilities import ( + find_vulnerabilities, + UImode +) class EngineTest(BaseTestCase): @@ -37,10 +37,8 @@ def run_analysis(self, path): return find_vulnerabilities( cfg_list, UImode.NORMAL, - VulnerabilityFiles( - default_blackbox_mapping_file, - default_trigger_word_file - ) + default_blackbox_mapping_file, + default_trigger_word_file ) def test_find_vulnerabilities_absolute_from_file_command_injection(self): diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index c67b36c1..649a0c28 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -2,17 +2,8 @@ from .base_test_case import BaseTestCase -from pyt.vulnerabilities import ( - find_vulnerabilities, - trigger_definitions_parser, - vulnerabilities -) from pyt.analysis.constraint_table import initialize_constraint_table from pyt.analysis.fixed_point import analyse -from pyt.argument_helpers import ( - UImode, - VulnerabilityFiles -) from pyt.framework_adaptor import FrameworkAdaptor from pyt.framework_helper import ( is_django_view_function, @@ -24,6 +15,13 @@ default_blackbox_mapping_file, default_trigger_word_file ) +from pyt.vulnerabilities import ( + find_vulnerabilities, + trigger_definitions_parser, + UImode, + vulnerabilities +) + class EngineTest(BaseTestCase): def run_empty(self): @@ -135,10 +133,8 @@ def run_analysis(self, path): return find_vulnerabilities( cfg_list, UImode.NORMAL, - VulnerabilityFiles( - default_blackbox_mapping_file, - default_trigger_word_file - ) + default_blackbox_mapping_file, + default_trigger_word_file ) def test_find_vulnerabilities_assign_other_var(self): @@ -529,10 +525,8 @@ def run_analysis(self, path): return find_vulnerabilities( cfg_list, UImode.NORMAL, - VulnerabilityFiles( - default_blackbox_mapping_file, - trigger_word_file - ) + default_blackbox_mapping_file, + trigger_word_file ) def test_django_view_param(self): @@ -576,10 +570,8 @@ def run_analysis(self, path): return find_vulnerabilities( cfg_list, UImode.NORMAL, - VulnerabilityFiles( - default_blackbox_mapping_file, - trigger_word_file - ) + default_blackbox_mapping_file, + trigger_word_file ) def test_self_is_not_tainted(self): From d1471afca18fdd07ccba7e7e8f81eb09269d44e0 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 23 Apr 2018 23:17:01 -0700 Subject: [PATCH 032/291] Delete baseline.py, move to vulnerability_helper --- pyt/__main__.py | 2 +- pyt/baseline.py | 13 ------------- pyt/usage.py | 7 +++---- pyt/vulnerabilities/__init__.py | 12 ++++++++++-- pyt/vulnerabilities/vulnerability_helper.py | 13 +++++++++++++ 5 files changed, 27 insertions(+), 20 deletions(-) delete mode 100644 pyt/baseline.py diff --git a/pyt/__main__.py b/pyt/__main__.py index b12bdf8b..dc6879b0 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -6,7 +6,6 @@ from .analysis.constraint_table import initialize_constraint_table from .analysis.fixed_point import analyse from .ast_helper import generate_ast -from .baseline import get_vulnerabilities_not_in_baseline from .cfg import make_cfg from .formatters import ( json, @@ -26,6 +25,7 @@ from .usage import parse_args from .vulnerabilities import ( find_vulnerabilities, + get_vulnerabilities_not_in_baseline, UImode ) diff --git a/pyt/baseline.py b/pyt/baseline.py deleted file mode 100644 index 1dc3128b..00000000 --- a/pyt/baseline.py +++ /dev/null @@ -1,13 +0,0 @@ -import json - - -def get_vulnerabilities_not_in_baseline( - vulnerabilities, - baseline_file -): - baseline = json.load(open(baseline_file)) - output = list() - for vuln in vulnerabilities: - if vuln.as_dict() not in baseline['vulnerabilities']: - output.append(vuln) - return(output) diff --git a/pyt/usage.py b/pyt/usage.py index 17d4587b..37fa3bc2 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -1,9 +1,6 @@ import argparse import os -from datetime import ( - date, - datetime -) +from datetime import datetime default_blackbox_mapping_file = os.path.join( @@ -97,6 +94,7 @@ def _add_print_group(parser): default=False ) + def _check_required_and_mutually_exclusive_args(parser, args): if args.filepath is None and args.git_repos is None: parser.error('one of the arguments -f/--filepath -gr/--git-repos is required') @@ -105,6 +103,7 @@ def _check_required_and_mutually_exclusive_args(parser, args): if args.trim_reassigned_in and args.interactive: parser.error('argument -i/--interactive: not allowed with argument -trim/--trim-reassigned-in') + def parse_args(args): if len(args) == 0: args.append('-h') diff --git a/pyt/vulnerabilities/__init__.py b/pyt/vulnerabilities/__init__.py index cd9670bc..992af18a 100644 --- a/pyt/vulnerabilities/__init__.py +++ b/pyt/vulnerabilities/__init__.py @@ -1,4 +1,12 @@ from .vulnerabilities import find_vulnerabilities -from .vulnerability_helper import UImode +from .vulnerability_helper import ( + get_vulnerabilities_not_in_baseline, + UImode +) -__all__ = ['find_vulnerabilities', 'UImode'] + +__all__ = [ + 'find_vulnerabilities', + 'get_vulnerabilities_not_in_baseline', + 'UImode' +] diff --git a/pyt/vulnerabilities/vulnerability_helper.py b/pyt/vulnerabilities/vulnerability_helper.py index d31c5ef7..bd2407dd 100644 --- a/pyt/vulnerabilities/vulnerability_helper.py +++ b/pyt/vulnerabilities/vulnerability_helper.py @@ -1,5 +1,6 @@ """This module contains vulnerability types, Enums, nodes and helpers.""" +import json from enum import Enum from collections import namedtuple @@ -188,3 +189,15 @@ def __repr__(self): 'sanitisers are {}, '.format(self.sanitisers) + 'cfg_node is {})\n'.format(self.cfg_node) ) + + +def get_vulnerabilities_not_in_baseline( + vulnerabilities, + baseline_file +): + baseline = json.load(open(baseline_file)) + output = list() + for vuln in vulnerabilities: + if vuln.as_dict() not in baseline['vulnerabilities']: + output.append(vuln) + return(output) From 7ce2d58d58f3443cd6500a0427bbf1e6c3e4d526 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 23 Apr 2018 23:36:59 -0700 Subject: [PATCH 033/291] Make core, web_frameworks and helper_visitors directories --- pyt/{ => core}/ast_helper.py | 0 pyt/{ => core}/module_definitions.py | 0 pyt/{ => core}/node_types.py | 0 pyt/{ => core}/project_handler.py | 0 pyt/{ => helper_visitors}/label_visitor.py | 0 pyt/{ => helper_visitors}/right_hand_side_visitor.py | 0 pyt/{ => helper_visitors}/vars_visitor.py | 0 pyt/{ => web_frameworks}/framework_adaptor.py | 0 pyt/{ => web_frameworks}/framework_helper.py | 0 9 files changed, 0 insertions(+), 0 deletions(-) rename pyt/{ => core}/ast_helper.py (100%) rename pyt/{ => core}/module_definitions.py (100%) rename pyt/{ => core}/node_types.py (100%) rename pyt/{ => core}/project_handler.py (100%) rename pyt/{ => helper_visitors}/label_visitor.py (100%) rename pyt/{ => helper_visitors}/right_hand_side_visitor.py (100%) rename pyt/{ => helper_visitors}/vars_visitor.py (100%) rename pyt/{ => web_frameworks}/framework_adaptor.py (100%) rename pyt/{ => web_frameworks}/framework_helper.py (100%) diff --git a/pyt/ast_helper.py b/pyt/core/ast_helper.py similarity index 100% rename from pyt/ast_helper.py rename to pyt/core/ast_helper.py diff --git a/pyt/module_definitions.py b/pyt/core/module_definitions.py similarity index 100% rename from pyt/module_definitions.py rename to pyt/core/module_definitions.py diff --git a/pyt/node_types.py b/pyt/core/node_types.py similarity index 100% rename from pyt/node_types.py rename to pyt/core/node_types.py diff --git a/pyt/project_handler.py b/pyt/core/project_handler.py similarity index 100% rename from pyt/project_handler.py rename to pyt/core/project_handler.py diff --git a/pyt/label_visitor.py b/pyt/helper_visitors/label_visitor.py similarity index 100% rename from pyt/label_visitor.py rename to pyt/helper_visitors/label_visitor.py diff --git a/pyt/right_hand_side_visitor.py b/pyt/helper_visitors/right_hand_side_visitor.py similarity index 100% rename from pyt/right_hand_side_visitor.py rename to pyt/helper_visitors/right_hand_side_visitor.py diff --git a/pyt/vars_visitor.py b/pyt/helper_visitors/vars_visitor.py similarity index 100% rename from pyt/vars_visitor.py rename to pyt/helper_visitors/vars_visitor.py diff --git a/pyt/framework_adaptor.py b/pyt/web_frameworks/framework_adaptor.py similarity index 100% rename from pyt/framework_adaptor.py rename to pyt/web_frameworks/framework_adaptor.py diff --git a/pyt/framework_helper.py b/pyt/web_frameworks/framework_helper.py similarity index 100% rename from pyt/framework_helper.py rename to pyt/web_frameworks/framework_helper.py From 9a4555e724b2b0f629ad5920abaa8ae20664c71e Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 24 Apr 2018 13:15:22 -0700 Subject: [PATCH 034/291] Make tests pass with the 'Make core, web_frameworks and helper_visitors directories' change --- pyt/__main__.py | 14 +- pyt/analysis/definition_chains.py | 2 +- pyt/analysis/lattice.py | 2 +- pyt/analysis/reaching_definitions_taint.py | 2 +- pyt/cfg/expr_visitor.py | 26 ++-- pyt/cfg/expr_visitor_helper.py | 2 +- pyt/cfg/stmt_visitor.py | 18 ++- pyt/cfg/stmt_visitor_helper.py | 2 +- pyt/core/node_types.py | 2 +- pyt/helper_visitors/__init__.py | 10 ++ pyt/helper_visitors/vars_visitor.py | 2 +- pyt/vulnerabilities/vulnerabilities.py | 8 +- pyt/web_frameworks/__init__.py | 20 +++ pyt/web_frameworks/framework_adaptor.py | 8 +- pyt/web_frameworks/framework_helper.py | 18 +-- tests/__main__.py | 6 +- tests/base_test_case.py | 4 +- tests/cfg_test.py | 144 +++++++++++++++------ tests/framework_helper_test.py | 11 +- tests/import_test.py | 34 ++--- tests/label_visitor_test.py | 42 +++--- tests/nested_functions_test.py | 6 +- tests/project_handler_test.py | 26 ++-- tests/vars_visitor_test.py | 3 +- tests/vulnerabilities_across_files_test.py | 11 +- tests/vulnerabilities_test.py | 14 +- 26 files changed, 274 insertions(+), 163 deletions(-) create mode 100644 pyt/helper_visitors/__init__.py create mode 100644 pyt/web_frameworks/__init__.py diff --git a/pyt/__main__.py b/pyt/__main__.py index dc6879b0..7831a177 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -11,13 +11,6 @@ json, text ) -from .framework_adaptor import FrameworkAdaptor -from .framework_helper import ( - is_django_view_function, - is_flask_route_function, - is_function, - is_function_without_leading_ -) from .project_handler import ( get_directory_modules, get_modules @@ -28,6 +21,13 @@ get_vulnerabilities_not_in_baseline, UImode ) +from pyt.web_frameworks import ( + FrameworkAdaptor, + is_django_view_function, + is_flask_route_function, + is_function, + is_function_without_leading_ +) def main(command_line_args=sys.argv[1:]): diff --git a/pyt/analysis/definition_chains.py b/pyt/analysis/definition_chains.py index e4c79c83..fad898b6 100644 --- a/pyt/analysis/definition_chains.py +++ b/pyt/analysis/definition_chains.py @@ -1,7 +1,7 @@ from collections import defaultdict from .constraint_table import constraint_table -from ..node_types import AssignmentNode +from ..core.node_types import AssignmentNode def get_constraint_nodes( diff --git a/pyt/analysis/lattice.py b/pyt/analysis/lattice.py index 148237c2..5ffcec11 100644 --- a/pyt/analysis/lattice.py +++ b/pyt/analysis/lattice.py @@ -1,5 +1,5 @@ from .constraint_table import constraint_table -from ..node_types import AssignmentNode +from ..core.node_types import AssignmentNode def get_lattice_elements(cfg_nodes): diff --git a/pyt/analysis/reaching_definitions_taint.py b/pyt/analysis/reaching_definitions_taint.py index 6ec26e01..dad8e9e1 100644 --- a/pyt/analysis/reaching_definitions_taint.py +++ b/pyt/analysis/reaching_definitions_taint.py @@ -2,8 +2,8 @@ constraint_join, constraint_table ) +from ..core.node_types import AssignmentNode from .lattice import Lattice -from ..node_types import AssignmentNode class ReachingDefinitionsTaintAnalysis(): diff --git a/pyt/cfg/expr_visitor.py b/pyt/cfg/expr_visitor.py index 2301fce6..b4a96168 100644 --- a/pyt/cfg/expr_visitor.py +++ b/pyt/cfg/expr_visitor.py @@ -1,20 +1,12 @@ import ast -from .alias_helper import ( - handle_aliases_in_calls -) -from ..ast_helper import ( +from .alias_helper import handle_aliases_in_calls +from ..core.ast_helper import ( Arguments, get_call_names_as_string ) -from .expr_visitor_helper import ( - BUILTINS, - return_connection_handler, - SavedVariable -) -from ..label_visitor import LabelVisitor -from ..module_definitions import ModuleDefinitions -from ..node_types import ( +from ..core.module_definitions import ModuleDefinitions +from ..core.node_types import ( AssignmentCallNode, AssignmentNode, BBorBInode, @@ -25,7 +17,15 @@ RestoreNode, ReturnNode ) -from ..right_hand_side_visitor import RHSVisitor +from .expr_visitor_helper import ( + BUILTINS, + return_connection_handler, + SavedVariable +) +from ..helper_visitors import ( + LabelVisitor, + RHSVisitor +) from .stmt_visitor import StmtVisitor from .stmt_visitor_helper import CALL_IDENTIFIER diff --git a/pyt/cfg/expr_visitor_helper.py b/pyt/cfg/expr_visitor_helper.py index 27e1df78..9667f7c1 100644 --- a/pyt/cfg/expr_visitor_helper.py +++ b/pyt/cfg/expr_visitor_helper.py @@ -1,6 +1,6 @@ from collections import namedtuple -from ..node_types import ConnectToExitNode +from ..core.node_types import ConnectToExitNode SavedVariable = namedtuple( diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index 85ed523c..06a985e5 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -9,17 +9,16 @@ not_as_alias_handler, retrieve_import_alias_mapping ) -from ..ast_helper import ( +from ..core.ast_helper import ( generate_ast, get_call_names_as_string ) -from ..label_visitor import LabelVisitor -from ..module_definitions import ( +from ..core.module_definitions import ( LocalModuleDefinition, ModuleDefinition, ModuleDefinitions ) -from ..node_types import ( +from ..core.node_types import ( AssignmentNode, AssignmentCallNode, BBorBInode, @@ -33,8 +32,14 @@ ReturnNode, TryNode ) -from ..project_handler import get_directory_modules -from ..right_hand_side_visitor import RHSVisitor +from ..core.project_handler import ( + get_directory_modules +) +from ..helper_visitors import ( + LabelVisitor, + RHSVisitor, + VarsVisitor +) from .stmt_visitor_helper import ( CALL_IDENTIFIER, ConnectStatements, @@ -45,7 +50,6 @@ get_last_statements, remove_breaks ) -from ..vars_visitor import VarsVisitor class StmtVisitor(ast.NodeVisitor): diff --git a/pyt/cfg/stmt_visitor_helper.py b/pyt/cfg/stmt_visitor_helper.py index a52f22b6..407df31f 100644 --- a/pyt/cfg/stmt_visitor_helper.py +++ b/pyt/cfg/stmt_visitor_helper.py @@ -2,7 +2,7 @@ import random from collections import namedtuple -from ..node_types import ( +from ..core.node_types import ( AssignmentCallNode, BBorBInode, BreakNode, diff --git a/pyt/core/node_types.py b/pyt/core/node_types.py index 3819963a..6cc2f1eb 100644 --- a/pyt/core/node_types.py +++ b/pyt/core/node_types.py @@ -1,7 +1,7 @@ """This module contains all of the CFG nodes types.""" from collections import namedtuple -from .label_visitor import LabelVisitor +from ..helper_visitors import LabelVisitor ControlFlowNode = namedtuple( diff --git a/pyt/helper_visitors/__init__.py b/pyt/helper_visitors/__init__.py new file mode 100644 index 00000000..ffd5f878 --- /dev/null +++ b/pyt/helper_visitors/__init__.py @@ -0,0 +1,10 @@ +from .label_visitor import LabelVisitor +from .right_hand_side_visitor import RHSVisitor +from .vars_visitor import VarsVisitor + + +__all__ = [ + 'LabelVisitor', + 'RHSVisitor', + 'VarsVisitor' +] diff --git a/pyt/helper_visitors/vars_visitor.py b/pyt/helper_visitors/vars_visitor.py index f64abd9e..bda24c9b 100644 --- a/pyt/helper_visitors/vars_visitor.py +++ b/pyt/helper_visitors/vars_visitor.py @@ -1,7 +1,7 @@ import ast import itertools -from .ast_helper import get_call_names +from ..core.ast_helper import get_call_names class VarsVisitor(ast.NodeVisitor): diff --git a/pyt/vulnerabilities/vulnerabilities.py b/pyt/vulnerabilities/vulnerabilities.py index 5132f7b5..e8786db8 100644 --- a/pyt/vulnerabilities/vulnerabilities.py +++ b/pyt/vulnerabilities/vulnerabilities.py @@ -5,15 +5,17 @@ from ..analysis.definition_chains import build_def_use_chain from ..analysis.lattice import Lattice -from ..node_types import ( +from ..core.node_types import ( AssignmentNode, BBorBInode, IfNode, TaintedNode ) -from ..right_hand_side_visitor import RHSVisitor +from ..helper_visitors import ( + RHSVisitor, + VarsVisitor +) from .trigger_definitions_parser import parse -from ..vars_visitor import VarsVisitor from .vulnerability_helper import ( Sanitiser, TriggerNode, diff --git a/pyt/web_frameworks/__init__.py b/pyt/web_frameworks/__init__.py new file mode 100644 index 00000000..e764e8a5 --- /dev/null +++ b/pyt/web_frameworks/__init__.py @@ -0,0 +1,20 @@ +from .framework_adaptor import ( + FrameworkAdaptor, + _get_func_nodes +) +from .framework_helper import ( + is_django_view_function, + is_flask_route_function, + is_function, + is_function_without_leading_ +) + + +__all__ = [ + 'FrameworkAdaptor', + 'is_django_view_function', + 'is_flask_route_function', + 'is_function', + 'is_function_without_leading_', + '_get_func_nodes' # Only used in framework_helper_test +] diff --git a/pyt/web_frameworks/framework_adaptor.py b/pyt/web_frameworks/framework_adaptor.py index 00f5a07b..2bc4d7ee 100644 --- a/pyt/web_frameworks/framework_adaptor.py +++ b/pyt/web_frameworks/framework_adaptor.py @@ -2,10 +2,10 @@ import ast -from .ast_helper import Arguments -from .cfg import make_cfg -from .module_definitions import project_definitions -from .node_types import ( +from ..cfg import make_cfg +from ..core.ast_helper import Arguments +from ..core.module_definitions import project_definitions +from ..core.node_types import ( AssignmentNode, TaintedNode ) diff --git a/pyt/web_frameworks/framework_helper.py b/pyt/web_frameworks/framework_helper.py index fd5996fa..7156968a 100644 --- a/pyt/web_frameworks/framework_helper.py +++ b/pyt/web_frameworks/framework_helper.py @@ -1,12 +1,14 @@ """Provides helper functions that help with determining if a function is a route function.""" import ast -from .ast_helper import get_call_names +from ..core.ast_helper import get_call_names -def is_function(function): - """Always returns true because arg is always a function.""" - return True +def is_django_view_function(ast_node): + if len(ast_node.args.args): + first_arg_name = ast_node.args.args[0].arg + return first_arg_name == 'request' + return False def is_flask_route_function(ast_node): @@ -18,11 +20,9 @@ def is_flask_route_function(ast_node): return False -def is_django_view_function(ast_node): - if len(ast_node.args.args): - first_arg_name = ast_node.args.args[0].arg - return first_arg_name == 'request' - return False +def is_function(function): + """Always returns true because arg is always a function.""" + return True def is_function_without_leading_(ast_node): diff --git a/tests/__main__.py b/tests/__main__.py index 8fad5457..61f69077 100644 --- a/tests/__main__.py +++ b/tests/__main__.py @@ -1,4 +1,8 @@ -from unittest import TestLoader, TestSuite, TextTestRunner +from unittest import ( + TestLoader, + TestSuite, + TextTestRunner +) test_suite = TestSuite() diff --git a/tests/base_test_case.py b/tests/base_test_case.py index 2683aae1..c0df17f2 100644 --- a/tests/base_test_case.py +++ b/tests/base_test_case.py @@ -1,9 +1,9 @@ """A module that contains a base class that has helper methods for testing PyT.""" import unittest -from pyt.ast_helper import generate_ast from pyt.cfg import make_cfg -from pyt.module_definitions import project_definitions +from pyt.core.ast_helper import generate_ast +from pyt.core.module_definitions import project_definitions class BaseTestCase(unittest.TestCase): diff --git a/tests/cfg_test.py b/tests/cfg_test.py index f3a3a28a..0f9ba2d3 100644 --- a/tests/cfg_test.py +++ b/tests/cfg_test.py @@ -1,5 +1,9 @@ from .base_test_case import BaseTestCase -from pyt.node_types import EntryOrExitNode, Node + +from pyt.core.node_types import ( + EntryOrExitNode, + Node +) class CFGGeneralTest(BaseTestCase): @@ -8,15 +12,11 @@ def test_repr_cfg(self): self.nodes = self.cfg_list_to_dict(self.cfg.nodes) - #print(repr(self.cfg)) - def test_str_cfg(self): self.cfg_create_from_file('examples/example_inputs/for_complete.py') self.nodes = self.cfg_list_to_dict(self.cfg.nodes) - #print(self.cfg) - def test_no_tuples(self): self.cfg_create_from_file('examples/example_inputs/for_complete.py') @@ -35,7 +35,10 @@ def test_start_and_exit_nodes(self): node = 1 exit_node = 2 - self.assertInCfg([(1,0),(2,1)]) + self.assertInCfg([ + (node, start_node), + (exit_node, node) + ]) self.assertEqual(type(self.cfg.nodes[start_node]), EntryOrExitNode) self.assertEqual(type(self.cfg.nodes[exit_node]), EntryOrExitNode) @@ -72,7 +75,7 @@ def test_for_complete(self): next_node = 6 exit_node = 7 - self.assertEqual(self.cfg.nodes[for_node].label,'for x in range(3):') + self.assertEqual(self.cfg.nodes[for_node].label, 'for x in range(3):') self.assertEqual(self.cfg.nodes[body_1].label, '~call_1 = ret_print(x)') self.assertEqual(self.cfg.nodes[body_2].label, 'y += 1') self.assertEqual(self.cfg.nodes[else_body_1].label, "~call_2 = ret_print('Final: %s' % x)") @@ -107,15 +110,23 @@ def test_for_no_orelse(self): def test_for_tuple_target(self): self.cfg_create_from_file('examples/example_inputs/for_tuple_target.py') - self.assert_length(self.cfg.nodes, expected_length = 4) + self.assert_length(self.cfg.nodes, expected_length=4) entry_node = 0 for_node = 1 print_node = 2 exit_node = 3 - self.assertInCfg([(for_node,entry_node),(print_node,for_node),(for_node,print_node),(exit_node,for_node)]) - self.assertEqual(self.cfg.nodes[for_node].label, "for (x, y) in [(1, 2), (3, 4)]:") + self.assertInCfg([ + (for_node, entry_node), + (print_node, for_node), + (for_node, print_node), + (exit_node, for_node) + ]) + self.assertEqual( + self.cfg.nodes[for_node].label, + "for (x, y) in [(1, 2), (3, 4)]:" + ) def test_for_line_numbers(self): self.cfg_create_from_file('examples/example_inputs/for_complete.py') @@ -152,16 +163,19 @@ def test_for_func_iterator(self): _print = 7 _exit = 8 - self.assertInCfg([(_for, entry), - (_for, call_foo), - (_for, _print), - (entry_foo, _for), - (call_to_range, entry_foo), - (ret_foo, call_to_range), - (exit_foo, ret_foo), - (call_foo, exit_foo), - (_print, _for), - (_exit, _for)]) + self.assertInCfg([ + (_for, entry), + (_for, call_foo), + (_for, _print), + (entry_foo, _for), + (call_to_range, entry_foo), + (ret_foo, call_to_range), + (exit_foo, ret_foo), + (call_foo, exit_foo), + (_print, _for), + (_exit, _for) + ]) + class CFGTryTest(BaseTestCase): def connected(self, node, successor): @@ -200,14 +214,14 @@ def test_orelse(self): print_a5 = 3 except_im = 4 except_im_body_1 = 5 - value_equal_call_2 = 6 # value = ~call_2 + value_equal_call_2 = 6 # value = ~call_2 print_wagyu = 7 save_node = 8 assign_to_temp = 9 assign_from_temp = 10 function_entry = 11 ret_of_subprocess_call = 12 - ret_does_this_kill_us_equal_call_5 = 13 # ret_does_this_kill_us = ~call_5 + ret_does_this_kill_us_equal_call_5 = 13 # ret_does_this_kill_us = ~call_5 function_exit = 14 restore_node = 15 return_handler = 16 @@ -297,8 +311,18 @@ def test_if_complete(self): self.assertEqual(self.cfg.nodes[else_body].label, 'x += 4') self.assertEqual(self.cfg.nodes[next_node].label, 'x += 5') - - self.assertInCfg([(test, entry), (eliftest, test), (body_1, test), (body_2, body_1), (next_node, body_2), (else_body, eliftest), (elif_body, eliftest), (next_node, elif_body), (next_node, else_body), (exit_node, next_node)]) + self.assertInCfg([ + (test, entry), + (eliftest, test), + (body_1, test), + (body_2, body_1), + (next_node, body_2), + (else_body, eliftest), + (elif_body, eliftest), + (next_node, elif_body), + (next_node, else_body), + (exit_node, next_node) + ]) def test_single_if(self): self.cfg_create_from_file('examples/example_inputs/if.py') @@ -309,7 +333,13 @@ def test_single_if(self): test_node = 1 body_node = 2 exit_node = 3 - self.assertInCfg([(test_node,start_node), (body_node,test_node), (exit_node,test_node), (exit_node,body_node)]) + + self.assertInCfg([ + (test_node, start_node), + (body_node, test_node), + (exit_node, test_node), + (exit_node, body_node) + ]) def test_single_if_else(self): self.cfg_create_from_file('examples/example_inputs/if_else.py') @@ -321,7 +351,14 @@ def test_single_if_else(self): body_node = 2 else_body = 3 exit_node = 4 - self.assertInCfg([(test_node,start_node), (body_node,test_node), (else_body,test_node), (exit_node,else_body), (exit_node,body_node)]) + + self.assertInCfg([ + (test_node, start_node), + (body_node, test_node), + (else_body, test_node), + (exit_node, else_body), + (exit_node, body_node) + ]) def test_multiple_if_else(self): self.cfg_create_from_file('examples/example_inputs/multiple_if_else.py') @@ -409,7 +446,6 @@ def test_nested_if_else_elif(self): (_exit, elif_body) ]) - def test_if_line_numbers(self): self.cfg_create_from_file('examples/example_inputs/if_complete.py') @@ -443,7 +479,12 @@ def test_if_not(self): body = 2 _exit = 3 - self.assertInCfg([(1, 0), (2, 1), (3, 2), (3, 1)]) + self.assertInCfg([ + (_if, entry), + (body, _if), + (_exit, body), + (_exit, _if) + ]) class CFGWhileTest(BaseTestCase): @@ -464,7 +505,16 @@ def test_while_complete(self): self.assertEqual(self.cfg.nodes[test].label, 'while x > 0:') - self.assertInCfg([(test, entry), (body_1, test), (else_body_1, test), ( body_2, body_1), (test, body_2), (else_body_2, else_body_1), (next_node, else_body_2), (exit_node, next_node)]) + self.assertInCfg([ + (test, entry), + (body_1, test), + (else_body_1, test), + (body_2, body_1), + (test, body_2), + (else_body_2, else_body_1), + (next_node, else_body_2), + (exit_node, next_node) + ]) def test_while_no_orelse(self): self.cfg_create_from_file('examples/example_inputs/while_no_orelse.py') @@ -478,7 +528,14 @@ def test_while_no_orelse(self): next_node = 4 exit_node = 5 - self.assertInCfg([(test, entry), (body_1, test), ( next_node, test), (body_2, body_1), (test, body_2), (exit_node, next_node)]) + self.assertInCfg([ + (test, entry), + (body_1, test), + (next_node, test), + (body_2, body_1), + (test, body_2), + (exit_node, next_node) + ]) def test_while_line_numbers(self): self.cfg_create_from_file('examples/example_inputs/while_complete.py') @@ -509,7 +566,7 @@ def test_assignment_multi_target(self): start_node = 0 node = 1 node_2 = 2 - exit_node =3 + exit_node = 3 self.assertInCfg([(node, start_node), (node_2, node), (exit_node, node_2)]) @@ -520,15 +577,22 @@ def test_assignment_multi_target_call(self): self.cfg_create_from_file('examples/example_inputs/assignment_multiple_assign_call.py') self.assert_length(self.cfg.nodes, expected_length=6) - start_node = self.cfg.nodes[0] + + # start_node = self.cfg.nodes[0] assignment_to_call1 = self.cfg.nodes[1] assignment_to_x = self.cfg.nodes[2] assignment_to_call2 = self.cfg.nodes[3] assignment_to_y = self.cfg.nodes[4] - exit_node = self.cfg.nodes[5] + # exit_node = self.cfg.nodes[5] # This assert means N should be connected to N+1 - self.assertInCfg([(1,0),(2,1),(3,2),(4,3),(5,4)]) + self.assertInCfg([ + (1, 0), + (2, 1), + (3, 2), + (4, 3), + (5, 4) + ]) self.assertEqual(assignment_to_call1.label, '~call_1 = ret_int(5)') self.assertEqual(assignment_to_x.label, 'x = ~call_1') @@ -570,10 +634,10 @@ def test_multiple_assignment(self): self.assert_length(self.cfg.nodes, expected_length=4) - start_node = self.cfg.nodes[0] + # start_node = self.cfg.nodes[0] assign_y = self.cfg.nodes[1] assign_x = self.cfg.nodes[2] - exit_node = self.cfg.nodes[-1] + # exit_node = self.cfg.nodes[-1] self.assertEqual(assign_x.label, 'x = 5') self.assertEqual(assign_y.label, 'y = 5') @@ -652,6 +716,7 @@ def test_dict_comprehension_multi(self): self.assertEqual(listcomp.label, 'dd = {x + y : y for x in [1, 2, 3] for y in [4, 5, 6]}') + class CFGFunctionNodeTest(BaseTestCase): def connected(self, node, successor): return (successor, node) @@ -1007,9 +1072,6 @@ def test_multiple_blackbox_calls_in_user_defined_call_after_if(self): (_exit, ret_send_file) ]) - - - def test_multiple_user_defined_calls_in_blackbox_call_after_if(self): path = 'examples/vulnerable_code/multiple_user_defined_calls_in_blackbox_call_after_if.py' self.cfg_create_from_file(path) @@ -1105,8 +1167,6 @@ def test_call_on_call(self): self.cfg_create_from_file(path) - - class CFGCallWithAttributeTest(BaseTestCase): def setUp(self): self.cfg_create_from_file('examples/example_inputs/call_with_attribute.py') @@ -1126,6 +1186,7 @@ def test_call_with_attribute_line_numbers(self): self.assertLineNumber(call, 5) + class CFGBreak(BaseTestCase): """Break in while and for and other places""" def test_break(self): @@ -1178,7 +1239,6 @@ class CFGName(BaseTestCase): def test_name_if(self): self.cfg_create_from_file('examples/example_inputs/name_if.py') - self.assert_length(self.cfg.nodes, expected_length=5) self.assertEqual(self.cfg.nodes[2].label, 'if x:') diff --git a/tests/framework_helper_test.py b/tests/framework_helper_test.py index 9086e55d..07bd01c8 100644 --- a/tests/framework_helper_test.py +++ b/tests/framework_helper_test.py @@ -1,10 +1,11 @@ from .base_test_case import BaseTestCase -from pyt.framework_adaptor import _get_func_nodes -from pyt.framework_helper import ( + +from pyt.web_frameworks import ( is_django_view_function, is_flask_route_function, is_function, is_function_without_leading_, + _get_func_nodes ) @@ -12,7 +13,6 @@ class FrameworkEngineTest(BaseTestCase): def test_find_flask_functions(self): self.cfg_create_from_file('examples/example_inputs/django_flask_and_normal_functions.py') - cfg_list = [self.cfg] funcs = _get_func_nodes() i = 0 @@ -23,11 +23,9 @@ def test_find_flask_functions(self): # So it is supposed to be 1, because foo is not an app.route self.assertEqual(i, 1) - def test_find_every_function_without_leading_underscore(self): self.cfg_create_from_file('examples/example_inputs/django_flask_and_normal_functions.py') - cfg_list = [self.cfg] funcs = _get_func_nodes() i = 0 @@ -40,7 +38,6 @@ def test_find_every_function_without_leading_underscore(self): def test_find_every_function(self): self.cfg_create_from_file('examples/example_inputs/django_flask_and_normal_functions.py') - cfg_list = [self.cfg] funcs = _get_func_nodes() i = 0 @@ -53,7 +50,6 @@ def test_find_every_function(self): def test_find_django_functions(self): self.cfg_create_from_file('examples/example_inputs/django_flask_and_normal_functions.py') - cfg_list = [self.cfg] funcs = _get_func_nodes() i = 0 @@ -67,7 +63,6 @@ def test_find_django_functions(self): def test_find_django_views(self): self.cfg_create_from_file('examples/example_inputs/django_views.py') - cfg_list = [self.cfg] funcs = _get_func_nodes() i = 0 diff --git a/tests/import_test.py b/tests/import_test.py index 94d829b1..6c2320cc 100644 --- a/tests/import_test.py +++ b/tests/import_test.py @@ -2,8 +2,12 @@ import os from .base_test_case import BaseTestCase -from pyt.ast_helper import get_call_names_as_string -from pyt.project_handler import get_directory_modules, get_modules_and_packages + +from pyt.core.ast_helper import get_call_names_as_string +from pyt.core.project_handler import ( + get_directory_modules, + get_modules_and_packages +) class ImportTest(BaseTestCase): @@ -184,17 +188,18 @@ def test_from_directory(self): self.cfg_create_from_file(file_path, project_modules, local_modules) - - EXPECTED = ["Entry module", - "Module Entry bar", - "Module Exit bar", - "temp_1_s = 'hey'", - "s = temp_1_s", - "Function Entry bar.H", - "ret_bar.H = s + 'end'", - "Exit bar.H", - "~call_1 = ret_bar.H", - "Exit module"] + EXPECTED = [ + "Entry module", + "Module Entry bar", + "Module Exit bar", + "temp_1_s = 'hey'", + "s = temp_1_s", + "Function Entry bar.H", + "ret_bar.H = s + 'end'", + "Exit bar.H", + "~call_1 = ret_bar.H", + "Exit module" + ] for node, expected_label in zip(self.cfg.nodes, EXPECTED): self.assertEqual(node.label, expected_label) @@ -313,7 +318,6 @@ def test_from_dot(self): 'c = ~call_1', 'Exit module'] - for node, expected_label in zip(self.cfg.nodes, EXPECTED): self.assertEqual(node.label, expected_label) @@ -338,7 +342,6 @@ def test_from_dot_dot(self): 'c = ~call_1', 'Exit module'] - for node, expected_label in zip(self.cfg.nodes, EXPECTED): self.assertEqual(node.label, expected_label) @@ -446,7 +449,6 @@ def test_multiple_functions_with_aliases(self): "c = ~call_3", "Exit module"] - for node, expected_label in zip(self.cfg.nodes, EXPECTED): self.assertEqual(node.label, expected_label) diff --git a/tests/label_visitor_test.py b/tests/label_visitor_test.py index 6ee44bb4..0f2d2f7d 100644 --- a/tests/label_visitor_test.py +++ b/tests/label_visitor_test.py @@ -1,7 +1,7 @@ import ast import unittest -from pyt.label_visitor import LabelVisitor +from pyt.helper_visitors import LabelVisitor class LabelVisitorTestCase(unittest.TestCase): @@ -14,64 +14,60 @@ def perform_labeling_on_expression(self, expr): return label + class LabelVisitorTest(LabelVisitorTestCase): def test_assign(self): label = self.perform_labeling_on_expression('a = 1') - self.assertEqual(label.result,'a = 1') + self.assertEqual(label.result, 'a = 1') def test_augassign(self): label = self.perform_labeling_on_expression('a +=2') - self.assertEqual(label.result,'a += 2') + self.assertEqual(label.result, 'a += 2') def test_compare_simple(self): label = self.perform_labeling_on_expression('a > b') - self.assertEqual(label.result,'a > b') + self.assertEqual(label.result, 'a > b') def test_compare_multi(self): label = self.perform_labeling_on_expression('a > b > c') - self.assertEqual(label.result,'a > b > c') + self.assertEqual(label.result, 'a > b > c') def test_binop(self): label = self.perform_labeling_on_expression('a / b') - self.assertEqual(label.result,'a / b') + self.assertEqual(label.result, 'a / b') def test_call_no_arg(self): label = self.perform_labeling_on_expression('range()') - self.assertEqual(label.result,'range()') - + self.assertEqual(label.result, 'range()') def test_call_single_arg(self): label = self.perform_labeling_on_expression('range(5)') - self.assertEqual(label.result,'range(5)') + self.assertEqual(label.result, 'range(5)') def test_call_multi_arg(self): - label = self.perform_labeling_on_expression('range(1,5)') - self.assertEqual(label.result,'range(1, 5)') + label = self.perform_labeling_on_expression('range(1, 5)') + self.assertEqual(label.result, 'range(1, 5)') def test_tuple_one_element(self): label = self.perform_labeling_on_expression('(1)') - self.assertEqual(label.result,'1') + self.assertEqual(label.result, '1') def test_tuple_two_elements(self): - label = self.perform_labeling_on_expression('(1,2)') - self.assertEqual(label.result,'(1, 2)') + label = self.perform_labeling_on_expression('(1, 2)') + self.assertEqual(label.result, '(1, 2)') def test_empty_tuple(self): label = self.perform_labeling_on_expression('()') - self.assertEqual(label.result,'()') + self.assertEqual(label.result, '()') def test_empty_list(self): label = self.perform_labeling_on_expression('[]') - self.assertEqual(label.result,'[]') + self.assertEqual(label.result, '[]') def test_list_one_element(self): label = self.perform_labeling_on_expression('[1]') - self.assertEqual(label.result,'[1]') + self.assertEqual(label.result, '[1]') def test_list_two_elements(self): - label = self.perform_labeling_on_expression('[1,2]') - self.assertEqual(label.result,'[1, 2]') - - - - + label = self.perform_labeling_on_expression('[1, 2]') + self.assertEqual(label.result, '[1, 2]') diff --git a/tests/nested_functions_test.py b/tests/nested_functions_test.py index 7147bbab..4d5d923c 100644 --- a/tests/nested_functions_test.py +++ b/tests/nested_functions_test.py @@ -1,7 +1,11 @@ import os.path from .base_test_case import BaseTestCase -from pyt.project_handler import get_directory_modules, get_modules_and_packages + +from pyt.core.project_handler import ( + get_directory_modules, + get_modules_and_packages +) class NestedTest(BaseTestCase): diff --git a/tests/project_handler_test.py b/tests/project_handler_test.py index a5105dc2..b6657cd5 100644 --- a/tests/project_handler_test.py +++ b/tests/project_handler_test.py @@ -1,13 +1,13 @@ import os import unittest -from pprint import pprint -from pyt.project_handler import ( +from pyt.core.project_handler import ( get_modules, get_modules_and_packages, is_python_file ) + class ProjectHandlerTest(unittest.TestCase): """Tests for the project handler.""" @@ -28,12 +28,12 @@ def test_get_modules(self): modules = get_modules(project_folder) app_path = os.path.join(project_folder, 'app.py') - utils_path = os.path.join(project_folder,'utils.py') + utils_path = os.path.join(project_folder, 'utils.py') exceptions_path = os.path.join(project_folder, 'exceptions.py') some_path = os.path.join(project_folder, folder, 'some.py') indhold_path = os.path.join(project_folder, folder, directory, 'indhold.py') - relative_folder_name = '.' + folder + # relative_folder_name = '.' + folder app_name = project_namespace + '.' + 'app' utils_name = project_namespace + '.' + 'utils' exceptions_name = project_namespace + '.' + 'exceptions' @@ -66,8 +66,8 @@ def test_get_modules_and_packages(self): folder_path = os.path.join(project_folder, folder) app_path = os.path.join(project_folder, 'app.py') exceptions_path = os.path.join(project_folder, 'exceptions.py') - utils_path = os.path.join(project_folder,'utils.py') - directory_path = os.path.join(project_folder, folder, directory) + utils_path = os.path.join(project_folder, 'utils.py') + # directory_path = os.path.join(project_folder, folder, directory) some_path = os.path.join(project_folder, folder, 'some.py') indhold_path = os.path.join(project_folder, folder, directory, 'indhold.py') @@ -75,15 +75,23 @@ def test_get_modules_and_packages(self): app_name = project_namespace + '.' + 'app' exceptions_name = project_namespace + '.' + 'exceptions' utils_name = project_namespace + '.' + 'utils' - relative_directory_name = '.' + folder + '.' + directory + # relative_directory_name = '.' + folder + '.' + directory some_name = project_namespace + '.' + folder + '.some' indhold_name = project_namespace + '.' + folder + '.' + directory + '.indhold' - folder_tuple = (relative_folder_name[1:], folder_path, relative_folder_name) + folder_tuple = ( + relative_folder_name[1:], + folder_path, + relative_folder_name + ) app_tuple = (app_name, app_path) exceptions_tuple = (exceptions_name, exceptions_path) utils_tuple = (utils_name, utils_path) - directory_tuple = (relative_directory_name[1:], directory_path, relative_directory_name) + # directory_tuple = ( + # relative_directory_name[1:], + # directory_path, + # relative_directory_name + # ) some_tuple = (some_name, some_path) indhold_tuple = (indhold_name, indhold_path) diff --git a/tests/vars_visitor_test.py b/tests/vars_visitor_test.py index 4f1c7c1e..09640434 100644 --- a/tests/vars_visitor_test.py +++ b/tests/vars_visitor_test.py @@ -1,7 +1,8 @@ import ast import unittest -from pyt.vars_visitor import VarsVisitor + +from pyt.helper_visitors import VarsVisitor class VarsVisitorTestCase(unittest.TestCase): diff --git a/tests/vulnerabilities_across_files_test.py b/tests/vulnerabilities_across_files_test.py index b405383b..ed2d82cd 100644 --- a/tests/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities_across_files_test.py @@ -4,9 +4,10 @@ from pyt.analysis.constraint_table import initialize_constraint_table from pyt.analysis.fixed_point import analyse -from pyt.framework_adaptor import FrameworkAdaptor -from pyt.framework_helper import is_flask_route_function -from pyt.project_handler import get_directory_modules, get_modules +from pyt.core.project_handler import ( + get_directory_modules, + get_modules +) from pyt.usage import ( default_blackbox_mapping_file, default_trigger_word_file @@ -15,6 +16,10 @@ find_vulnerabilities, UImode ) +from pyt.web_frameworks import ( + FrameworkAdaptor, + is_flask_route_function +) class EngineTest(BaseTestCase): diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index 649a0c28..4fda2076 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -4,13 +4,7 @@ from pyt.analysis.constraint_table import initialize_constraint_table from pyt.analysis.fixed_point import analyse -from pyt.framework_adaptor import FrameworkAdaptor -from pyt.framework_helper import ( - is_django_view_function, - is_flask_route_function, - is_function -) -from pyt.node_types import Node +from pyt.core.node_types import Node from pyt.usage import ( default_blackbox_mapping_file, default_trigger_word_file @@ -21,6 +15,12 @@ UImode, vulnerabilities ) +from pyt.web_frameworks import ( + FrameworkAdaptor, + is_django_view_function, + is_flask_route_function, + is_function +) class EngineTest(BaseTestCase): From 151e59fce07f6569b1f226f6b9d8ba4c7590d19e Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 24 Apr 2018 13:27:29 -0700 Subject: [PATCH 035/291] Organize tests into different folders --- tests/analysis/analysis_base_test_case.py | 1 + tests/cfg/__init__.py | 0 tests/{ => cfg}/cfg_test.py | 2 +- tests/{ => cfg}/import_test.py | 2 +- tests/{ => cfg}/nested_functions_test.py | 2 +- tests/core/__init__.py | 0 tests/{ => core}/project_handler_test.py | 0 tests/helper_visitors/__init__.py | 0 tests/{ => helper_visitors}/label_visitor_test.py | 0 tests/{ => helper_visitors}/vars_visitor_test.py | 1 - tests/vulnerabilities/__init__.py | 0 .../{ => vulnerabilities}/vulnerabilities_across_files_test.py | 2 +- tests/{ => vulnerabilities}/vulnerabilities_test.py | 2 +- tests/web_frameworks/__init__.py | 0 tests/{ => web_frameworks}/framework_helper_test.py | 2 +- 15 files changed, 7 insertions(+), 7 deletions(-) create mode 100644 tests/cfg/__init__.py rename tests/{ => cfg}/cfg_test.py (99%) rename tests/{ => cfg}/import_test.py (99%) rename tests/{ => cfg}/nested_functions_test.py (97%) create mode 100644 tests/core/__init__.py rename tests/{ => core}/project_handler_test.py (100%) create mode 100644 tests/helper_visitors/__init__.py rename tests/{ => helper_visitors}/label_visitor_test.py (100%) rename tests/{ => helper_visitors}/vars_visitor_test.py (99%) create mode 100644 tests/vulnerabilities/__init__.py rename tests/{ => vulnerabilities}/vulnerabilities_across_files_test.py (99%) rename tests/{ => vulnerabilities}/vulnerabilities_test.py (99%) create mode 100644 tests/web_frameworks/__init__.py rename tests/{ => web_frameworks}/framework_helper_test.py (98%) diff --git a/tests/analysis/analysis_base_test_case.py b/tests/analysis/analysis_base_test_case.py index 37d8e6f6..a6252c01 100644 --- a/tests/analysis/analysis_base_test_case.py +++ b/tests/analysis/analysis_base_test_case.py @@ -1,6 +1,7 @@ from collections import namedtuple from ..base_test_case import BaseTestCase + from pyt.analysis.constraint_table import initialize_constraint_table from pyt.analysis.fixed_point import FixedPointAnalysis from pyt.analysis.lattice import Lattice diff --git a/tests/cfg/__init__.py b/tests/cfg/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/cfg_test.py b/tests/cfg/cfg_test.py similarity index 99% rename from tests/cfg_test.py rename to tests/cfg/cfg_test.py index 0f9ba2d3..e9a1f489 100644 --- a/tests/cfg_test.py +++ b/tests/cfg/cfg_test.py @@ -1,4 +1,4 @@ -from .base_test_case import BaseTestCase +from ..base_test_case import BaseTestCase from pyt.core.node_types import ( EntryOrExitNode, diff --git a/tests/import_test.py b/tests/cfg/import_test.py similarity index 99% rename from tests/import_test.py rename to tests/cfg/import_test.py index 6c2320cc..842fafa5 100644 --- a/tests/import_test.py +++ b/tests/cfg/import_test.py @@ -1,7 +1,7 @@ import ast import os -from .base_test_case import BaseTestCase +from ..base_test_case import BaseTestCase from pyt.core.ast_helper import get_call_names_as_string from pyt.core.project_handler import ( diff --git a/tests/nested_functions_test.py b/tests/cfg/nested_functions_test.py similarity index 97% rename from tests/nested_functions_test.py rename to tests/cfg/nested_functions_test.py index 4d5d923c..7a5f5772 100644 --- a/tests/nested_functions_test.py +++ b/tests/cfg/nested_functions_test.py @@ -1,6 +1,6 @@ import os.path -from .base_test_case import BaseTestCase +from ..base_test_case import BaseTestCase from pyt.core.project_handler import ( get_directory_modules, diff --git a/tests/core/__init__.py b/tests/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/project_handler_test.py b/tests/core/project_handler_test.py similarity index 100% rename from tests/project_handler_test.py rename to tests/core/project_handler_test.py diff --git a/tests/helper_visitors/__init__.py b/tests/helper_visitors/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/label_visitor_test.py b/tests/helper_visitors/label_visitor_test.py similarity index 100% rename from tests/label_visitor_test.py rename to tests/helper_visitors/label_visitor_test.py diff --git a/tests/vars_visitor_test.py b/tests/helper_visitors/vars_visitor_test.py similarity index 99% rename from tests/vars_visitor_test.py rename to tests/helper_visitors/vars_visitor_test.py index 09640434..849206b8 100644 --- a/tests/vars_visitor_test.py +++ b/tests/helper_visitors/vars_visitor_test.py @@ -1,7 +1,6 @@ import ast import unittest - from pyt.helper_visitors import VarsVisitor diff --git a/tests/vulnerabilities/__init__.py b/tests/vulnerabilities/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/vulnerabilities_across_files_test.py b/tests/vulnerabilities/vulnerabilities_across_files_test.py similarity index 99% rename from tests/vulnerabilities_across_files_test.py rename to tests/vulnerabilities/vulnerabilities_across_files_test.py index ed2d82cd..026c38ef 100644 --- a/tests/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities/vulnerabilities_across_files_test.py @@ -1,6 +1,6 @@ import os -from .base_test_case import BaseTestCase +from ..base_test_case import BaseTestCase from pyt.analysis.constraint_table import initialize_constraint_table from pyt.analysis.fixed_point import analyse diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py similarity index 99% rename from tests/vulnerabilities_test.py rename to tests/vulnerabilities/vulnerabilities_test.py index 4fda2076..175746a9 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -1,6 +1,6 @@ import os -from .base_test_case import BaseTestCase +from ..base_test_case import BaseTestCase from pyt.analysis.constraint_table import initialize_constraint_table from pyt.analysis.fixed_point import analyse diff --git a/tests/web_frameworks/__init__.py b/tests/web_frameworks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/framework_helper_test.py b/tests/web_frameworks/framework_helper_test.py similarity index 98% rename from tests/framework_helper_test.py rename to tests/web_frameworks/framework_helper_test.py index 07bd01c8..5ee8d21d 100644 --- a/tests/framework_helper_test.py +++ b/tests/web_frameworks/framework_helper_test.py @@ -1,4 +1,4 @@ -from .base_test_case import BaseTestCase +from ..base_test_case import BaseTestCase from pyt.web_frameworks import ( is_django_view_function, From d0e8ef0cfbb855ba44b359c11e1cc4fe7924fd26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Wed, 25 Apr 2018 00:21:54 +0300 Subject: [PATCH 036/291] Code edited edited argument, moved code place --- pyt/__main__.py | 50 ++++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 9b197906..edb73b2b 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -142,9 +142,9 @@ def parse_args(args): '(only JSON-formatted files are accepted)', type=str, default=False) - parser.add_argument('-in', '--ignore-nosec', - help='Ignoring nosec commands', - action='http://github.com/store_true') + parser.add_argument('--ignore-nosec', dest='ignore_nosec', action='http://github.com/store_true', + help='do not skip lines with # nosec comments' + ) save_parser = subparsers.add_parser('save', help='Save menu.') save_parser.set_defaults(which='save') @@ -238,7 +238,28 @@ def main(command_line_args=sys.argv[1:]): elif args.trim_reassigned_in: ui_mode = UImode.TRIM + path = os.path.normpath(args.filepath) cfg_list = list() + if args.ignore_nosec: + nosec_lines = set() + else: + file = open(path, "r") + lines = file.readlines() + nosec_lines = set( + lineno for + (lineno, line) in enumerate(lines, start=1) + if '#nosec' in line or '# nosec' in line) + vulnerabilities = find_vulnerabilities( + cfg_list, + analysis, + ui_mode, + VulnerabilityFiles( + args.blackbox_mapping_file, + args.trigger_word_file + ), + nosec_lines + ) + if args.git_repos: repos = get_repos(args.git_repos) for repo in repos: @@ -266,8 +287,6 @@ def main(command_line_args=sys.argv[1:]): ) exit() - path = os.path.normpath(args.filepath) - directory = None if args.project_root: directory = os.path.normpath(args.project_root) @@ -301,7 +320,6 @@ def main(command_line_args=sys.argv[1:]): analyse(cfg_list, analysis_type=analysis) - nosec_lines = set() vulnerabilities = find_vulnerabilities( cfg_list, analysis, @@ -313,26 +331,6 @@ def main(command_line_args=sys.argv[1:]): nosec_lines ) - if args.ignore_nosec: - nosec_lines = set() - else: - file = open(path, "r") - lines = file.readlines() - nosec_lines = set( - lineno for - (lineno, line) in enumerate(lines, start=1) - if '#nosec' in line or '# nosec' in line) - vulnerabilities = find_vulnerabilities( - cfg_list, - analysis, - ui_mode, - VulnerabilityFiles( - args.blackbox_mapping_file, - args.trigger_word_file - ), - nosec_lines - ) - if args.baseline: vulnerabilities = get_vulnerabilities_not_in_baseline(vulnerabilities, args.baseline) From 8569aa467a6524f3ae11091c2783d3a9f8249f8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Wed, 25 Apr 2018 00:24:20 +0300 Subject: [PATCH 037/291] unnecessary codes deleted --- pyt/vulnerabilities.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index 5e9bd675..eeb9404a 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -187,8 +187,6 @@ def find_triggers( for node in nodes: if node.line_number not in nosec_lines: trigger_nodes.extend(iter(label_contains(node, trigger_words))) - else: - pass return trigger_nodes @@ -529,7 +527,6 @@ def find_vulnerabilities( with open(vulnerability_files.blackbox_mapping) as infile: blackbox_mapping = json.load(infile) for cfg in cfg_list: - find_vulnerabilities_in_cfg( cfg, definitions, From a76893fb8cbb4412ee1373b5b39b5be5bccb567b Mon Sep 17 00:00:00 2001 From: KevinHock Date: Tue, 24 Apr 2018 14:38:55 -0700 Subject: [PATCH 038/291] Add PRs Welcome badge --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index 9bd41255..db509cf1 100644 --- a/README.rst +++ b/README.rst @@ -10,6 +10,9 @@ .. image:: https://badge.fury.io/py/python-taint.svg :target: https://badge.fury.io/py/python-taint +.. image:: https://img.shields.io/badge/PRs-welcome-ff69b4.svg + :target: https://github.com/python-security/pyt/issues?q=is%3Aopen+is%3Aissue+label%3Agood-first-issue + Python Taint ============ From dc1b21097d3d4f2f525bb8851476b2675a561e58 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Tue, 24 Apr 2018 14:45:48 -0700 Subject: [PATCH 039/291] Add Python 3.6 badge --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index db509cf1..65fd44e5 100644 --- a/README.rst +++ b/README.rst @@ -1,3 +1,6 @@ +.. image:: https://img.shields.io/badge/python-v3.6-blue.svg + :target: https://pypi.org/project/python-taint/ + .. image:: https://travis-ci.org/python-security/pyt.svg?branch=master :target: https://travis-ci.org/python-security/pyt From f238575182b1efa85d1a8f52957526299c10796b Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 24 Apr 2018 15:07:21 -0700 Subject: [PATCH 040/291] [wip] trying to make deadcode in reaching_definitions_taint_test work --- tests/__main__.py | 2 +- .../reaching_definitions_taint_test.py | 10 +++-- tests/base_test_case.py | 40 +++---------------- tests/vulnerabilities/vulnerabilities_test.py | 13 ------ 4 files changed, 13 insertions(+), 52 deletions(-) diff --git a/tests/__main__.py b/tests/__main__.py index 61f69077..5356acd2 100644 --- a/tests/__main__.py +++ b/tests/__main__.py @@ -15,6 +15,6 @@ if result.wasSuccessful(): print('Success') exit(0) -else: +else: # pragma: no cover print('Failure') exit(1) diff --git a/tests/analysis/reaching_definitions_taint_test.py b/tests/analysis/reaching_definitions_taint_test.py index e2d437a8..03a38c4b 100644 --- a/tests/analysis/reaching_definitions_taint_test.py +++ b/tests/analysis/reaching_definitions_taint_test.py @@ -1,10 +1,12 @@ from .analysis_base_test_case import AnalysisBaseTestCase +from pyt.analysis.constraint_table import constraint_table + class ReachingDefinitionsTaintTest(AnalysisBaseTestCase): # Note: the numbers in the test represent the line numbers of the assignments in the program. def test_linear_program(self): - constraint_table = {} + # constraint_table = {} lattice = self.run_analysis('examples/example_inputs/linear.py') EXPECTED = [ @@ -22,7 +24,7 @@ def test_linear_program(self): i = i + 1 def test_if_program(self): - constraint_table = {} + # constraint_table = {} lattice = self.run_analysis('examples/example_inputs/if_program.py') EXPECTED = [ @@ -41,7 +43,7 @@ def test_if_program(self): i = i + 1 def test_example(self): - constraint_table = {} + # constraint_table = {} lattice = self.run_analysis('examples/example_inputs/example.py') EXPECTED = [ @@ -82,7 +84,7 @@ def test_func_with_params(self): *self.constraints([2, 3, 4, 6, 7, 9], 10)], lattice) def test_while(self): - constraint_table = {} + # constraint_table = {} lattice = self.run_analysis('examples/example_inputs/while.py') EXPECTED = [ diff --git a/tests/base_test_case.py b/tests/base_test_case.py index c0df17f2..b2b70dca 100644 --- a/tests/base_test_case.py +++ b/tests/base_test_case.py @@ -46,45 +46,15 @@ def assertInCfg(self, connections): "(%s <- %s)" % (sets, element) + " expected to be disconnected" ) - def assertConnected(self, node, successor): - """Asserts that a node is connected to its successor. - This means that node has successor in its outgoing and - successor has node in its ingoing.""" - - self.assertIn( - successor, - node.outgoing, - '\n%s was NOT found in the outgoing list of %s containing: ' % (successor.label, node.label) + '[' + ', '.join([x.label for x in node.outgoing]) + ']' - ) - - self.assertIn( - node, - successor.ingoing, - '\n%s was NOT found in the ingoing list of %s containing: ' % (node.label, successor.label) + '[' + ', '.join([x.label for x in successor.ingoing]) + ']' - ) - - def assertNotConnected(self, node, successor): - """Asserts that a node is not connected to its successor. - This means that node does not the successor in its outgoing and - successor does not have the node in its ingoing.""" - - self.assertNotIn( - successor, - node.outgoing, - '\n%s was mistakenly found in the outgoing list of %s containing: ' % (successor.label, node.label) + '[' + ', '.join([x.label for x in node.outgoing]) + ']' - ) - self.assertNotIn( - node, - successor.ingoing, - '\n%s was mistakenly found in the ingoing list of %s containing: ' % (node.label, successor.label) + '[' + ', '.join([x.label for x in successor.ingoing]) + ']' - ) - def assertLineNumber(self, node, line_number): + """Only used in cfg_test.""" self.assertEqual(node.line_number, line_number) def cfg_list_to_dict(self, list): """This method converts the CFG list to a dict, making it easier to find nodes to test. - This method assumes that no nodes in the code have the same label""" + This method assumes that no nodes in the code have the same label. + Only used in cfg_test. + """ return {x.label: x for x in list} def assert_length(self, _list, *, expected_length): @@ -107,12 +77,14 @@ def cfg_create_from_file( ) def string_compare_alpha(self, output, expected_string): + # Only used in vulnerability tests return ( [char for char in output if char.isalpha()] == [char for char in expected_string if char.isalpha()] ) def string_compare_alnum(self, output, expected_string): + # Only used in reaching_definitions_taint_test return ( [char for char in output if char.isalnum()] == [char for char in expected_string if char.isalnum()] diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 175746a9..7313a766 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -24,13 +24,6 @@ class EngineTest(BaseTestCase): - def run_empty(self): - return - - def get_lattice_elements(self, cfg_nodes): - """Dummy analysis method""" - return cfg_nodes - def test_parse(self): definitions = trigger_definitions_parser.parse( trigger_word_file=os.path.join( @@ -504,9 +497,6 @@ def test_XSS_variable_multiple_assign_result(self): class EngineDjangoTest(BaseTestCase): - def run_empty(self): - return - def run_analysis(self, path): self.cfg_create_from_file(path) cfg_list = [self.cfg] @@ -549,9 +539,6 @@ def test_django_view_param(self): class EngineEveryTest(BaseTestCase): - def run_empty(self): - return - def run_analysis(self, path): self.cfg_create_from_file(path) cfg_list = [self.cfg] From 36d0a3afd8959d6091dfcf09dd653281b404729b Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 24 Apr 2018 15:08:55 -0700 Subject: [PATCH 041/291] Add init file to vulnerability_definitions folder --- pyt/vulnerability_definitions/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 pyt/vulnerability_definitions/__init__.py diff --git a/pyt/vulnerability_definitions/__init__.py b/pyt/vulnerability_definitions/__init__.py new file mode 100644 index 00000000..e69de29b From a017a29b1e7fa797575913a97d71689a2fa319aa Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 24 Apr 2018 15:10:22 -0700 Subject: [PATCH 042/291] Bump version to 0.31 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e0ed08c5..e648647e 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -VERSION = '0.30' +VERSION = '0.31' setup( From a9029d9cf90435994d5811138a76645fc0b018a9 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 24 Apr 2018 15:24:44 -0700 Subject: [PATCH 043/291] Bump to 0.32, include package_data from every package --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index e648647e..9a387648 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -VERSION = '0.31' +VERSION = '0.32' setup( @@ -11,7 +11,7 @@ version=VERSION, include_package_data=True, package_data={ - 'pyt': ['*.json', '*.pyt'], + '': ['*.json', '*.pyt'], }, description='Find security vulnerabilities in Python web applications' ' using static analysis.', From bc8a28f9eb00bf8f8a46c49ee264db2a869e2b53 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 24 Apr 2018 15:35:21 -0700 Subject: [PATCH 044/291] Bump to 0.33, see if package_data works at all --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 9a387648..e1fcace3 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -VERSION = '0.32' +VERSION = '0.33' setup( @@ -11,7 +11,7 @@ version=VERSION, include_package_data=True, package_data={ - '': ['*.json', '*.pyt'], + 'pyt.vulnerability_definitions': ['*.json', '*.pyt'], }, description='Find security vulnerabilities in Python web applications' ' using static analysis.', From c5235fd402d3ad483ded8208ae487e29eb2b53ca Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 24 Apr 2018 16:06:59 -0700 Subject: [PATCH 045/291] Finally fix the missing files with a MANIFEST.in --- MANIFEST.in | 1 + pyt/vulnerability_definitions/__init__.py | 0 setup.py | 5 +---- 3 files changed, 2 insertions(+), 4 deletions(-) create mode 100644 MANIFEST.in delete mode 100644 pyt/vulnerability_definitions/__init__.py diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..d1c8d23a --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +graft pyt/vulnerability_definitions diff --git a/pyt/vulnerability_definitions/__init__.py b/pyt/vulnerability_definitions/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/setup.py b/setup.py index e1fcace3..39aa9440 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -VERSION = '0.33' +VERSION = '0.34' setup( @@ -10,9 +10,6 @@ packages=find_packages(exclude=(['tests*'])), version=VERSION, include_package_data=True, - package_data={ - 'pyt.vulnerability_definitions': ['*.json', '*.pyt'], - }, description='Find security vulnerabilities in Python web applications' ' using static analysis.', long_description="Check out PyT on `GitHub `_!", From 30019c56e937c6806570d1d7ddee6145363a4d58 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Tue, 24 Apr 2018 16:14:41 -0700 Subject: [PATCH 046/291] Release 0.34, added thanks to @Ekultek --- CHANGELOG.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18e840a6..564342ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,8 +22,8 @@ If you love PyT, please star our project on GitHub to show your support! :star: [@xxxx]: https://github.com/xxxx --> -# Unreleased -##### April 18, 2018 +# 0.34 +##### April 24, 2018 #### :tada: New Features @@ -42,6 +42,7 @@ If you love PyT, please star our project on GitHub to show your support! :star: #### :bug: Bugfixes * Fixed a bug where `visit_Raise` raised a `TypeError` ([#117], thanks [@lFatty]) * Fixed an infinite loop bug that was caused while handling certain loops ([#118]) +* Fixed a bug where we were not including `pyt/vulnerability_definitions` files ([#122], thanks [@Ekultek]) #### :snake: Miscellaneous @@ -55,3 +56,5 @@ If you love PyT, please star our project on GitHub to show your support! :star: [#111]: https://github.com/python-security/pyt/pull/111 [#110]: https://github.com/python-security/pyt/pull/110 [@lfatty]: https://github.com/lfatty +[#122]: https://github.com/python-security/pyt/issues/122 +[@Ekultek]: https://github.com/Ekultek From 20b62c39e11dd1f2ee495bdd662c3d64b30044e5 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Wed, 25 Apr 2018 09:31:41 -0700 Subject: [PATCH 047/291] Move 3.6 badge to the right --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 65fd44e5..5e259000 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,3 @@ -.. image:: https://img.shields.io/badge/python-v3.6-blue.svg - :target: https://pypi.org/project/python-taint/ - .. image:: https://travis-ci.org/python-security/pyt.svg?branch=master :target: https://travis-ci.org/python-security/pyt @@ -16,6 +13,9 @@ .. image:: https://img.shields.io/badge/PRs-welcome-ff69b4.svg :target: https://github.com/python-security/pyt/issues?q=is%3Aopen+is%3Aissue+label%3Agood-first-issue +.. image:: https://img.shields.io/badge/python-v3.6-blue.svg + :target: https://pypi.org/project/python-taint/ + Python Taint ============ From e8cddd06515bb24b46c8d28636cafe8a43e1f885 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Thu, 26 Apr 2018 01:12:43 +0300 Subject: [PATCH 048/291] passed nosec_lines on analyse_repo unnecessary codes deleted, passed nosec_lines on analyse_repo --- pyt/__main__.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index edb73b2b..8ce6cc67 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -195,7 +195,7 @@ def parse_args(args): return parser.parse_args(args) -def analyse_repo(args, github_repo, analysis_type, ui_mode): +def analyse_repo(args, github_repo, analysis_type, ui_mode, nosec_lines): cfg_list = list() directory = os.path.dirname(github_repo.path) project_modules = get_modules(directory) @@ -218,7 +218,8 @@ def analyse_repo(args, github_repo, analysis_type, ui_mode): VulnerabilityFiles( args.blackbox_mapping_file, args.trigger_word_file - ) + ), + nosec_lines ) return vulnerabilities @@ -248,23 +249,13 @@ def main(command_line_args=sys.argv[1:]): nosec_lines = set( lineno for (lineno, line) in enumerate(lines, start=1) - if '#nosec' in line or '# nosec' in line) - vulnerabilities = find_vulnerabilities( - cfg_list, - analysis, - ui_mode, - VulnerabilityFiles( - args.blackbox_mapping_file, - args.trigger_word_file - ), - nosec_lines - ) + if '#nosec' in line or '# nosec' in line) if args.git_repos: repos = get_repos(args.git_repos) for repo in repos: repo.clone() - vulnerabilities = analyse_repo(args, repo, analysis, ui_mode) + vulnerabilities = analyse_repo(args, repo, analysis, ui_mode, nosec_lines) if args.json: json.report(vulnerabilities, sys.stdout) else: From 448e987fa9f87fec790bb7f425017351ca6c53e7 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Wed, 25 Apr 2018 18:44:53 -0700 Subject: [PATCH 049/291] Fixed deadcode in reaching_definitions_taint_test --- .coveragerc | 4 +++- tests/analysis/analysis_base_test_case.py | 11 ++++++++++- tests/analysis/reaching_definitions_taint_test.py | 4 ---- tox.ini | 3 ++- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/.coveragerc b/.coveragerc index 2678c03a..ebd6df4f 100644 --- a/.coveragerc +++ b/.coveragerc @@ -9,7 +9,9 @@ exclude_lines = raise NotImplementedError [run] -source = ./pyt +source = + ./pyt + ./tests omit = pyt/__main__.py pyt/usage.py diff --git a/tests/analysis/analysis_base_test_case.py b/tests/analysis/analysis_base_test_case.py index a6252c01..76037c7e 100644 --- a/tests/analysis/analysis_base_test_case.py +++ b/tests/analysis/analysis_base_test_case.py @@ -2,11 +2,19 @@ from ..base_test_case import BaseTestCase -from pyt.analysis.constraint_table import initialize_constraint_table +from pyt.analysis.constraint_table import ( + constraint_table, + initialize_constraint_table +) from pyt.analysis.fixed_point import FixedPointAnalysis from pyt.analysis.lattice import Lattice +def clear_constraint_table(): + for key in list(constraint_table): + del constraint_table[key] + + class AnalysisBaseTestCase(BaseTestCase): connection = namedtuple( 'connection', @@ -55,6 +63,7 @@ def constraints(self, list_of_constraints, node_number): def run_analysis(self, path): self.cfg_create_from_file(path) + clear_constraint_table() initialize_constraint_table([self.cfg]) self.analysis = FixedPointAnalysis(self.cfg) self.analysis.fixpoint_runner() diff --git a/tests/analysis/reaching_definitions_taint_test.py b/tests/analysis/reaching_definitions_taint_test.py index 03a38c4b..20f50b5a 100644 --- a/tests/analysis/reaching_definitions_taint_test.py +++ b/tests/analysis/reaching_definitions_taint_test.py @@ -6,7 +6,6 @@ class ReachingDefinitionsTaintTest(AnalysisBaseTestCase): # Note: the numbers in the test represent the line numbers of the assignments in the program. def test_linear_program(self): - # constraint_table = {} lattice = self.run_analysis('examples/example_inputs/linear.py') EXPECTED = [ @@ -24,7 +23,6 @@ def test_linear_program(self): i = i + 1 def test_if_program(self): - # constraint_table = {} lattice = self.run_analysis('examples/example_inputs/if_program.py') EXPECTED = [ @@ -43,7 +41,6 @@ def test_if_program(self): i = i + 1 def test_example(self): - # constraint_table = {} lattice = self.run_analysis('examples/example_inputs/example.py') EXPECTED = [ @@ -84,7 +81,6 @@ def test_func_with_params(self): *self.constraints([2, 3, 4, 6, 7, 9], 10)], lattice) def test_while(self): - # constraint_table = {} lattice = self.run_analysis('examples/example_inputs/while.py') EXPECTED = [ diff --git a/tox.ini b/tox.ini index 933a1460..f85b156c 100644 --- a/tox.ini +++ b/tox.ini @@ -7,5 +7,6 @@ deps = -rrequirements-dev.txt commands = coverage erase coverage run tests - coverage report --show-missing --fail-under 88 + coverage report --include=tests/* --fail-under 100 + coverage report --include=pyt/* --fail-under 88 pre-commit run From de48495322e70f00d94dc98d9e8f74f0d654f73b Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Wed, 25 Apr 2018 18:48:10 -0700 Subject: [PATCH 050/291] [coveragerc] Add show_missing = True --- .coveragerc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.coveragerc b/.coveragerc index ebd6df4f..bfd2571f 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,4 +1,6 @@ [report] +show_missing = True + exclude_lines = def valid_date def __repr__ From 0f5fb431c0ac6d804d54439e266da4c70addae84 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Wed, 25 Apr 2018 19:07:33 -0700 Subject: [PATCH 051/291] fix silly usage and main -r things --- pyt/__main__.py | 12 ++++++------ pyt/usage.py | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 7831a177..c1bc68ef 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -5,23 +5,23 @@ from .analysis.constraint_table import initialize_constraint_table from .analysis.fixed_point import analyse -from .ast_helper import generate_ast from .cfg import make_cfg +from .core.ast_helper import generate_ast +from .core.project_handler import ( + get_directory_modules, + get_modules +) from .formatters import ( json, text ) -from .project_handler import ( - get_directory_modules, - get_modules -) from .usage import parse_args from .vulnerabilities import ( find_vulnerabilities, get_vulnerabilities_not_in_baseline, UImode ) -from pyt.web_frameworks import ( +from .web_frameworks import ( FrameworkAdaptor, is_django_view_function, is_flask_route_function, diff --git a/pyt/usage.py b/pyt/usage.py index 37fa3bc2..d2cf2d98 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -96,10 +96,10 @@ def _add_print_group(parser): def _check_required_and_mutually_exclusive_args(parser, args): - if args.filepath is None and args.git_repos is None: - parser.error('one of the arguments -f/--filepath -gr/--git-repos is required') - if args.filepath and args.git_repos: - parser.error('argument -f/--filepath: not allowed with argument -gr/--git-repos') + if args.filepath is None and args.root_directory is None: + parser.error('one of the arguments -f/--filepath -r/--root-directory is required') + if args.filepath and args.root_directory: + parser.error('argument -f/--filepath: not allowed with argument -r/--root-directory') if args.trim_reassigned_in and args.interactive: parser.error('argument -i/--interactive: not allowed with argument -trim/--trim-reassigned-in') From 8c2dffa6c0219cefa6ec9ae1ff9762042eb0d0f2 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Wed, 25 Apr 2018 19:11:10 -0700 Subject: [PATCH 052/291] Added: Please see the change log For a look at recent changes, please see the `change log`_. --- README.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.rst b/README.rst index 5e259000..84f04a08 100644 --- a/README.rst +++ b/README.rst @@ -41,6 +41,10 @@ Features * A lot of customisation possible +For a look at recent changes, please see the `change log`_. + +.. _change_log: https://github.com/python-security/pyt/blob/master/CHANGELOG.md + Example usage and output: .. image:: https://raw.githubusercontent.com/KevinHock/rtdpyt/master/readme_static_files/pyt_example.png From 846347b1827a1cbdcdda4f974988e40a52ea25e4 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Wed, 25 Apr 2018 19:11:55 -0700 Subject: [PATCH 053/291] Update README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 84f04a08..2f8e9644 100644 --- a/README.rst +++ b/README.rst @@ -41,7 +41,7 @@ Features * A lot of customisation possible -For a look at recent changes, please see the `change log`_. +For a look at recent changes, please see the `change log`_ .. _change_log: https://github.com/python-security/pyt/blob/master/CHANGELOG.md From 3f9d3efe00f50e8d9283b1ae7c01feb7f8f843cc Mon Sep 17 00:00:00 2001 From: KevinHock Date: Wed, 25 Apr 2018 19:14:31 -0700 Subject: [PATCH 054/291] Update README.rst --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 2f8e9644..65084da8 100644 --- a/README.rst +++ b/README.rst @@ -41,9 +41,9 @@ Features * A lot of customisation possible -For a look at recent changes, please see the `change log`_ +For a look at recent changes, please see the `changelog`_. -.. _change_log: https://github.com/python-security/pyt/blob/master/CHANGELOG.md +.. _changelog: https://github.com/python-security/pyt/blob/master/CHANGELOG.md Example usage and output: From 3d5867b49f88698430318e79e88803bf14ba234c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Thu, 26 Apr 2018 18:40:20 +0300 Subject: [PATCH 055/291] Added nosec_lines Added empty nosec_lines for tests --- tests/vulnerabilities_test.py | 290 +++++++++++++++------------------- 1 file changed, 130 insertions(+), 160 deletions(-) diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index f3d77279..7ec1ae1f 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -15,15 +15,14 @@ from pyt.constraint_table import initialize_constraint_table from pyt.fixed_point import analyse from pyt.framework_adaptor import FrameworkAdaptor -from pyt.framework_helper import ( +from pyt.framework_helper import( is_django_view_function, - is_flask_route_function, - is_function + is_flask_route_function ) from pyt.node_types import Node from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis - +nosec_lines = set() class EngineTest(BaseTestCase): def run_empty(self): return @@ -84,7 +83,7 @@ def test_label_contains(self): self.assert_length(l, expected_length=2) def test_find_triggers(self): - self.cfg_create_from_file('examples/vulnerable_code/XSS.py') + self.cfg_create_from_file('example/vulnerable_code/XSS.py') cfg_list = [self.cfg] @@ -93,27 +92,29 @@ def test_find_triggers(self): XSS1 = cfg_list[1] trigger_words = [('get', [])] - l = vulnerabilities.find_triggers(XSS1.nodes, trigger_words) + l = vulnerabilities.find_triggers(XSS1.nodes, trigger_words, nosec_lines) self.assert_length(l, expected_length=1) + def test_find_sanitiser_nodes(self): cfg_node = Node(None, None, line_number=None, path=None) - sanitiser_tuple = vulnerabilities.Sanitiser('escape', cfg_node) + sanitiser_tuple = vulnerabilities.Sanitiser('escape', cfg_node) sanitiser = 'escape' result = list(vulnerabilities.find_sanitiser_nodes(sanitiser, [sanitiser_tuple])) self.assert_length(result, expected_length=1) self.assertEqual(result[0], cfg_node) + def test_build_sanitiser_node_dict(self): - self.cfg_create_from_file('examples/vulnerable_code/XSS_sanitised.py') + self.cfg_create_from_file('example/vulnerable_code/XSS_sanitised.py') cfg_list = [self.cfg] FrameworkAdaptor(cfg_list, [], [], is_flask_route_function) cfg = cfg_list[1] - cfg_node = Node(None, None, line_number=None, path=None) + cfg_node = Node(None, None, line_number=None, path=None) sinks_in_file = [vulnerabilities.TriggerNode('replace', ['escape'], cfg_node)] sanitiser_dict = vulnerabilities.build_sanitiser_node_dict(cfg, sinks_in_file) @@ -124,6 +125,7 @@ def test_build_sanitiser_node_dict(self): def run_analysis(self, path): self.cfg_create_from_file(path) + cfg_list = [self.cfg] FrameworkAdaptor(cfg_list, [], [], is_flask_route_function) @@ -138,39 +140,40 @@ def run_analysis(self, path): VulnerabilityFiles( default_blackbox_mapping_file, default_trigger_word_file - ) + ), + nosec_lines ) def test_find_vulnerabilities_assign_other_var(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_assign_to_other_var.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_assign_to_other_var.py') self.assert_length(vulnerabilities, expected_length=1) def test_find_vulnerabilities_inter_command_injection(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/inter_command_injection.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/inter_command_injection.py') self.assert_length(vulnerabilities, expected_length=1) def test_find_vulnerabilities_inter_command_injection_2(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/inter_command_injection_2.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/inter_command_injection_2.py') self.assert_length(vulnerabilities, expected_length=1) def test_XSS_result(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/XSS.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/vulnerable_code/XSS.py + File: example/vulnerable_code/XSS.py > User input at line 6, trigger word "request.args.get(": ~call_1 = ret_request.args.get('param', 'not set') Reassigned in: - File: examples/vulnerable_code/XSS.py + File: example/vulnerable_code/XSS.py > Line 6: param = ~call_1 - File: examples/vulnerable_code/XSS.py + File: example/vulnerable_code/XSS.py > Line 9: ~call_3 = ret_make_response(~call_4) - File: examples/vulnerable_code/XSS.py + File: example/vulnerable_code/XSS.py > Line 9: resp = ~call_3 - File: examples/vulnerable_code/XSS.py + File: example/vulnerable_code/XSS.py > Line 10: ret_XSS1 = resp - File: examples/vulnerable_code/XSS.py + File: example/vulnerable_code/XSS.py > reaches line 9, trigger word "replace(": ~call_4 = ret_html.replace('{{ param }}', param) """ @@ -178,17 +181,17 @@ def test_XSS_result(self): self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) def test_command_injection_result(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/command_injection.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/command_injection.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/vulnerable_code/command_injection.py + File: example/vulnerable_code/command_injection.py > User input at line 15, trigger word "form[": param = request.form['suggestion'] Reassigned in: - File: examples/vulnerable_code/command_injection.py + File: example/vulnerable_code/command_injection.py > Line 16: command = 'echo ' + param + ' >> ' + 'menu.txt' - File: examples/vulnerable_code/command_injection.py + File: example/vulnerable_code/command_injection.py > reaches line 18, trigger word "subprocess.call(": ~call_1 = ret_subprocess.call(command, shell=True) """ @@ -196,37 +199,37 @@ def test_command_injection_result(self): self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) def test_path_traversal_result(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/path_traversal.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/path_traversal.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/vulnerable_code/path_traversal.py + File: example/vulnerable_code/path_traversal.py > User input at line 15, trigger word "request.args.get(": ~call_1 = ret_request.args.get('image_name') Reassigned in: - File: examples/vulnerable_code/path_traversal.py + File: example/vulnerable_code/path_traversal.py > Line 15: image_name = ~call_1 - File: examples/vulnerable_code/path_traversal.py + File: example/vulnerable_code/path_traversal.py > Line 6: save_2_image_name = image_name - File: examples/vulnerable_code/path_traversal.py + File: example/vulnerable_code/path_traversal.py > Line 10: save_3_image_name = image_name - File: examples/vulnerable_code/path_traversal.py + File: example/vulnerable_code/path_traversal.py > Line 10: image_name = save_3_image_name - File: examples/vulnerable_code/path_traversal.py + File: example/vulnerable_code/path_traversal.py > Line 19: temp_2_other_arg = image_name - File: examples/vulnerable_code/path_traversal.py + File: example/vulnerable_code/path_traversal.py > Line 6: other_arg = temp_2_other_arg - File: examples/vulnerable_code/path_traversal.py + File: example/vulnerable_code/path_traversal.py > Line 7: outer_ret_val = outer_arg + 'hey' + other_arg - File: examples/vulnerable_code/path_traversal.py + File: example/vulnerable_code/path_traversal.py > Line 8: ret_outer = outer_ret_val - File: examples/vulnerable_code/path_traversal.py + File: example/vulnerable_code/path_traversal.py > Line 6: image_name = save_2_image_name - File: examples/vulnerable_code/path_traversal.py + File: example/vulnerable_code/path_traversal.py > Line 19: ~call_2 = ret_outer - File: examples/vulnerable_code/path_traversal.py + File: example/vulnerable_code/path_traversal.py > Line 19: foo = ~call_2 - File: examples/vulnerable_code/path_traversal.py + File: example/vulnerable_code/path_traversal.py > reaches line 20, trigger word "send_file(": ~call_4 = ret_send_file(foo) """ @@ -234,37 +237,37 @@ def test_path_traversal_result(self): self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) def test_ensure_saved_scope(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/ensure_saved_scope.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/ensure_saved_scope.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/vulnerable_code/ensure_saved_scope.py + File: example/vulnerable_code/ensure_saved_scope.py > User input at line 15, trigger word "request.args.get(": ~call_1 = ret_request.args.get('image_name') Reassigned in: - File: examples/vulnerable_code/ensure_saved_scope.py + File: example/vulnerable_code/ensure_saved_scope.py > Line 15: image_name = ~call_1 - File: examples/vulnerable_code/ensure_saved_scope.py + File: example/vulnerable_code/ensure_saved_scope.py > Line 6: save_2_image_name = image_name - File: examples/vulnerable_code/ensure_saved_scope.py + File: example/vulnerable_code/ensure_saved_scope.py > Line 10: save_3_image_name = image_name - File: examples/vulnerable_code/ensure_saved_scope.py + File: example/vulnerable_code/ensure_saved_scope.py > Line 10: image_name = save_3_image_name - File: examples/vulnerable_code/ensure_saved_scope.py + File: example/vulnerable_code/ensure_saved_scope.py > Line 19: temp_2_other_arg = image_name - File: examples/vulnerable_code/ensure_saved_scope.py + File: example/vulnerable_code/ensure_saved_scope.py > Line 6: other_arg = temp_2_other_arg - File: examples/vulnerable_code/ensure_saved_scope.py + File: example/vulnerable_code/ensure_saved_scope.py > Line 7: outer_ret_val = outer_arg + 'hey' + other_arg - File: examples/vulnerable_code/ensure_saved_scope.py + File: example/vulnerable_code/ensure_saved_scope.py > Line 8: ret_outer = outer_ret_val - File: examples/vulnerable_code/ensure_saved_scope.py + File: example/vulnerable_code/ensure_saved_scope.py > Line 6: image_name = save_2_image_name - File: examples/vulnerable_code/ensure_saved_scope.py + File: example/vulnerable_code/ensure_saved_scope.py > Line 19: ~call_2 = ret_outer - File: examples/vulnerable_code/ensure_saved_scope.py + File: example/vulnerable_code/ensure_saved_scope.py > Line 19: foo = ~call_2 - File: examples/vulnerable_code/ensure_saved_scope.py + File: example/vulnerable_code/ensure_saved_scope.py > reaches line 20, trigger word "send_file(": ~call_4 = ret_send_file(image_name) """ @@ -272,25 +275,25 @@ def test_ensure_saved_scope(self): self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) def test_path_traversal_sanitised_result(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/path_traversal_sanitised.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/path_traversal_sanitised.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/vulnerable_code/path_traversal_sanitised.py + File: example/vulnerable_code/path_traversal_sanitised.py > User input at line 8, trigger word "request.args.get(": ~call_1 = ret_request.args.get('image_name') Reassigned in: - File: examples/vulnerable_code/path_traversal_sanitised.py + File: example/vulnerable_code/path_traversal_sanitised.py > Line 8: image_name = ~call_1 - File: examples/vulnerable_code/path_traversal_sanitised.py + File: example/vulnerable_code/path_traversal_sanitised.py > Line 10: ~call_2 = ret_image_name.replace('..', '') - File: examples/vulnerable_code/path_traversal_sanitised.py + File: example/vulnerable_code/path_traversal_sanitised.py > Line 10: image_name = ~call_2 - File: examples/vulnerable_code/path_traversal_sanitised.py + File: example/vulnerable_code/path_traversal_sanitised.py > Line 12: ~call_4 = ret_os.path.join(~call_5, image_name) - File: examples/vulnerable_code/path_traversal_sanitised.py + File: example/vulnerable_code/path_traversal_sanitised.py > Line 12: ret_cat_picture = ~call_3 - File: examples/vulnerable_code/path_traversal_sanitised.py + File: example/vulnerable_code/path_traversal_sanitised.py > reaches line 12, trigger word "send_file(": ~call_3 = ret_send_file(~call_4) This vulnerability is sanitised by: Label: ~call_2 = ret_image_name.replace('..', '') @@ -299,21 +302,21 @@ def test_path_traversal_sanitised_result(self): self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) def test_path_traversal_sanitised_2_result(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/path_traversal_sanitised_2.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/path_traversal_sanitised_2.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/vulnerable_code/path_traversal_sanitised_2.py + File: example/vulnerable_code/path_traversal_sanitised_2.py > User input at line 8, trigger word "request.args.get(": ~call_1 = ret_request.args.get('image_name') Reassigned in: - File: examples/vulnerable_code/path_traversal_sanitised_2.py + File: example/vulnerable_code/path_traversal_sanitised_2.py > Line 8: image_name = ~call_1 - File: examples/vulnerable_code/path_traversal_sanitised_2.py + File: example/vulnerable_code/path_traversal_sanitised_2.py > Line 12: ~call_3 = ret_os.path.join(~call_4, image_name) - File: examples/vulnerable_code/path_traversal_sanitised_2.py + File: example/vulnerable_code/path_traversal_sanitised_2.py > Line 12: ret_cat_picture = ~call_2 - File: examples/vulnerable_code/path_traversal_sanitised_2.py + File: example/vulnerable_code/path_traversal_sanitised_2.py > reaches line 12, trigger word "send_file(": ~call_2 = ret_send_file(~call_3) This vulnerability is potentially sanitised by: Label: if '..' in image_name: @@ -322,19 +325,19 @@ def test_path_traversal_sanitised_2_result(self): self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) def test_sql_result(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/sql/sqli.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/sql/sqli.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/vulnerable_code/sql/sqli.py + File: example/vulnerable_code/sql/sqli.py > User input at line 26, trigger word "request.args.get(": ~call_1 = ret_request.args.get('param', 'not set') Reassigned in: - File: examples/vulnerable_code/sql/sqli.py + File: example/vulnerable_code/sql/sqli.py > Line 26: param = ~call_1 - File: examples/vulnerable_code/sql/sqli.py + File: example/vulnerable_code/sql/sqli.py > Line 27: result = ~call_2 - File: examples/vulnerable_code/sql/sqli.py + File: example/vulnerable_code/sql/sqli.py > reaches line 27, trigger word "execute(": ~call_2 = ret_db.engine.execute(param) """ @@ -342,21 +345,21 @@ def test_sql_result(self): self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) def test_XSS_form_result(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_form.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_form.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/vulnerable_code/XSS_form.py + File: example/vulnerable_code/XSS_form.py > User input at line 14, trigger word "form[": data = request.form['my_text'] Reassigned in: - File: examples/vulnerable_code/XSS_form.py + File: example/vulnerable_code/XSS_form.py > Line 15: ~call_1 = ret_make_response(~call_2) - File: examples/vulnerable_code/XSS_form.py + File: example/vulnerable_code/XSS_form.py > Line 15: resp = ~call_1 - File: examples/vulnerable_code/XSS_form.py + File: example/vulnerable_code/XSS_form.py > Line 17: ret_example2_action = resp - File: examples/vulnerable_code/XSS_form.py + File: example/vulnerable_code/XSS_form.py > reaches line 15, trigger word "replace(": ~call_2 = ret_html1.replace('{{ data }}', data) """ @@ -364,23 +367,23 @@ def test_XSS_form_result(self): self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) def test_XSS_url_result(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_url.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_url.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/vulnerable_code/XSS_url.py + File: example/vulnerable_code/XSS_url.py > User input at line 4, trigger word "Framework function URL parameter": url Reassigned in: - File: examples/vulnerable_code/XSS_url.py + File: example/vulnerable_code/XSS_url.py > Line 6: param = url - File: examples/vulnerable_code/XSS_url.py + File: example/vulnerable_code/XSS_url.py > Line 9: ~call_2 = ret_make_response(~call_3) - File: examples/vulnerable_code/XSS_url.py + File: example/vulnerable_code/XSS_url.py > Line 9: resp = ~call_2 - File: examples/vulnerable_code/XSS_url.py + File: example/vulnerable_code/XSS_url.py > Line 10: ret_XSS1 = resp - File: examples/vulnerable_code/XSS_url.py + File: example/vulnerable_code/XSS_url.py > reaches line 9, trigger word "replace(": ~call_3 = ret_html.replace('{{ param }}', param) """ @@ -388,29 +391,29 @@ def test_XSS_url_result(self): self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) def test_XSS_no_vuln_result(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_no_vuln.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_no_vuln.py') self.assert_length(vulnerabilities, expected_length=0) def test_XSS_reassign_result(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_reassign.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_reassign.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/vulnerable_code/XSS_reassign.py + File: example/vulnerable_code/XSS_reassign.py > User input at line 6, trigger word "request.args.get(": ~call_1 = ret_request.args.get('param', 'not set') Reassigned in: - File: examples/vulnerable_code/XSS_reassign.py + File: example/vulnerable_code/XSS_reassign.py > Line 6: param = ~call_1 - File: examples/vulnerable_code/XSS_reassign.py + File: example/vulnerable_code/XSS_reassign.py > Line 8: param = param + '' - File: examples/vulnerable_code/XSS_reassign.py + File: example/vulnerable_code/XSS_reassign.py > Line 11: ~call_3 = ret_make_response(~call_4) - File: examples/vulnerable_code/XSS_reassign.py + File: example/vulnerable_code/XSS_reassign.py > Line 11: resp = ~call_3 - File: examples/vulnerable_code/XSS_reassign.py + File: example/vulnerable_code/XSS_reassign.py > Line 12: ret_XSS1 = resp - File: examples/vulnerable_code/XSS_reassign.py + File: example/vulnerable_code/XSS_reassign.py > reaches line 11, trigger word "replace(": ~call_4 = ret_html.replace('{{ param }}', param) """ @@ -418,27 +421,27 @@ def test_XSS_reassign_result(self): self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) def test_XSS_sanitised_result(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_sanitised.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_sanitised.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/vulnerable_code/XSS_sanitised.py + File: example/vulnerable_code/XSS_sanitised.py > User input at line 7, trigger word "request.args.get(": ~call_1 = ret_request.args.get('param', 'not set') Reassigned in: - File: examples/vulnerable_code/XSS_sanitised.py + File: example/vulnerable_code/XSS_sanitised.py > Line 7: param = ~call_1 - File: examples/vulnerable_code/XSS_sanitised.py + File: example/vulnerable_code/XSS_sanitised.py > Line 9: ~call_2 = ret_Markup.escape(param) - File: examples/vulnerable_code/XSS_sanitised.py + File: example/vulnerable_code/XSS_sanitised.py > Line 9: param = ~call_2 - File: examples/vulnerable_code/XSS_sanitised.py + File: example/vulnerable_code/XSS_sanitised.py > Line 12: ~call_4 = ret_make_response(~call_5) - File: examples/vulnerable_code/XSS_sanitised.py + File: example/vulnerable_code/XSS_sanitised.py > Line 12: resp = ~call_4 - File: examples/vulnerable_code/XSS_sanitised.py + File: example/vulnerable_code/XSS_sanitised.py > Line 13: ret_XSS1 = resp - File: examples/vulnerable_code/XSS_sanitised.py + File: example/vulnerable_code/XSS_sanitised.py > reaches line 12, trigger word "replace(": ~call_5 = ret_html.replace('{{ param }}', param) This vulnerability is sanitised by: Label: ~call_2 = ret_Markup.escape(param) @@ -447,29 +450,29 @@ def test_XSS_sanitised_result(self): self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) def test_XSS_variable_assign_no_vuln_result(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_variable_assign_no_vuln.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_variable_assign_no_vuln.py') self.assert_length(vulnerabilities, expected_length=0) def test_XSS_variable_assign_result(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_variable_assign.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_variable_assign.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/vulnerable_code/XSS_variable_assign.py + File: example/vulnerable_code/XSS_variable_assign.py > User input at line 6, trigger word "request.args.get(": ~call_1 = ret_request.args.get('param', 'not set') Reassigned in: - File: examples/vulnerable_code/XSS_variable_assign.py + File: example/vulnerable_code/XSS_variable_assign.py > Line 6: param = ~call_1 - File: examples/vulnerable_code/XSS_variable_assign.py + File: example/vulnerable_code/XSS_variable_assign.py > Line 8: other_var = param + '' - File: examples/vulnerable_code/XSS_variable_assign.py + File: example/vulnerable_code/XSS_variable_assign.py > Line 11: ~call_3 = ret_make_response(~call_4) - File: examples/vulnerable_code/XSS_variable_assign.py + File: example/vulnerable_code/XSS_variable_assign.py > Line 11: resp = ~call_3 - File: examples/vulnerable_code/XSS_variable_assign.py + File: example/vulnerable_code/XSS_variable_assign.py > Line 12: ret_XSS1 = resp - File: examples/vulnerable_code/XSS_variable_assign.py + File: example/vulnerable_code/XSS_variable_assign.py > reaches line 11, trigger word "replace(": ~call_4 = ret_html.replace('{{ param }}', other_var) """ @@ -477,29 +480,29 @@ def test_XSS_variable_assign_result(self): self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) def test_XSS_variable_multiple_assign_result(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_variable_multiple_assign.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_variable_multiple_assign.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/vulnerable_code/XSS_variable_multiple_assign.py + File: example/vulnerable_code/XSS_variable_multiple_assign.py > User input at line 6, trigger word "request.args.get(": ~call_1 = ret_request.args.get('param', 'not set') Reassigned in: - File: examples/vulnerable_code/XSS_variable_multiple_assign.py + File: example/vulnerable_code/XSS_variable_multiple_assign.py > Line 6: param = ~call_1 - File: examples/vulnerable_code/XSS_variable_multiple_assign.py + File: example/vulnerable_code/XSS_variable_multiple_assign.py > Line 8: other_var = param + '' - File: examples/vulnerable_code/XSS_variable_multiple_assign.py + File: example/vulnerable_code/XSS_variable_multiple_assign.py > Line 10: not_the_same_var = '' + other_var - File: examples/vulnerable_code/XSS_variable_multiple_assign.py + File: example/vulnerable_code/XSS_variable_multiple_assign.py > Line 12: another_one = not_the_same_var + '' - File: examples/vulnerable_code/XSS_variable_multiple_assign.py + File: example/vulnerable_code/XSS_variable_multiple_assign.py > Line 15: ~call_3 = ret_make_response(~call_4) - File: examples/vulnerable_code/XSS_variable_multiple_assign.py + File: example/vulnerable_code/XSS_variable_multiple_assign.py > Line 15: resp = ~call_3 - File: examples/vulnerable_code/XSS_variable_multiple_assign.py + File: example/vulnerable_code/XSS_variable_multiple_assign.py > Line 17: ret_XSS1 = resp - File: examples/vulnerable_code/XSS_variable_multiple_assign.py + File: example/vulnerable_code/XSS_variable_multiple_assign.py > reaches line 15, trigger word "replace(": ~call_4 = ret_html.replace('{{ param }}', another_one) """ @@ -533,57 +536,24 @@ def run_analysis(self, path): VulnerabilityFiles( default_blackbox_mapping_file, trigger_word_file - ) + ), + nosec_lines ) def test_django_view_param(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code/django_XSS.py') + vulnerabilities = self.run_analysis('example/vulnerable_code/django_XSS.py') self.assert_length(vulnerabilities, expected_length=2) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/vulnerable_code/django_XSS.py + File: example/vulnerable_code/django_XSS.py > User input at line 4, trigger word "Framework function URL parameter": param Reassigned in: - File: examples/vulnerable_code/django_XSS.py + File: example/vulnerable_code/django_XSS.py > Line 5: ret_xss1 = ~call_1 - File: examples/vulnerable_code/django_XSS.py + File: example/vulnerable_code/django_XSS.py > reaches line 5, trigger word "render(": ~call_1 = ret_render(request, 'templates/xss.html', 'param'param) """ self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - - -class EngineEveryTest(BaseTestCase): - def run_empty(self): - return - - def run_analysis(self, path): - self.cfg_create_from_file(path) - cfg_list = [self.cfg] - - FrameworkAdaptor(cfg_list, [], [], is_function) - initialize_constraint_table(cfg_list) - - analyse(cfg_list, analysis_type=ReachingDefinitionsTaintAnalysis) - - trigger_word_file = os.path.join( - 'pyt', - 'vulnerability_definitions', - 'all_trigger_words.pyt' - ) - - return vulnerabilities.find_vulnerabilities( - cfg_list, - ReachingDefinitionsTaintAnalysis, - UImode.NORMAL, - VulnerabilityFiles( - default_blackbox_mapping_file, - trigger_word_file - ) - ) - - def test_self_is_not_tainted(self): - vulnerabilities = self.run_analysis('examples/example_inputs/def_with_self_as_first_arg.py') - self.assert_length(vulnerabilities, expected_length=0) From 1502ee73e81ba6f981ca39658d6f93863091727c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Thu, 26 Apr 2018 18:45:03 +0300 Subject: [PATCH 056/291] added empty nosec lines for tests --- tests/vulnerabilities_test.py | 664 ++++++++++------------------------ 1 file changed, 200 insertions(+), 464 deletions(-) diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index 7ec1ae1f..b30d415b 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -1,11 +1,6 @@ import os from .base_test_case import BaseTestCase - -from pyt import ( - trigger_definitions_parser, - vulnerabilities -) from pyt.argument_helpers import ( default_blackbox_mapping_file, default_trigger_word_file, @@ -15,125 +10,30 @@ from pyt.constraint_table import initialize_constraint_table from pyt.fixed_point import analyse from pyt.framework_adaptor import FrameworkAdaptor -from pyt.framework_helper import( - is_django_view_function, - is_flask_route_function -) -from pyt.node_types import Node +from pyt.framework_helper import is_flask_route_function +from pyt.project_handler import get_directory_modules, get_modules from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis +from pyt.vulnerabilities import find_vulnerabilities nosec_lines = set() class EngineTest(BaseTestCase): - def run_empty(self): - return - - def get_lattice_elements(self, cfg_nodes): - """Dummy analysis method""" - return cfg_nodes - - def test_parse(self): - definitions = vulnerabilities.parse( - trigger_word_file=os.path.join( - os.getcwd(), - 'pyt', - 'vulnerability_definitions', - 'test_triggers.pyt' - ) - ) - - self.assert_length(definitions.sources, expected_length=1) - self.assert_length(definitions.sinks, expected_length=3) - self.assert_length(definitions.sinks[0][1], expected_length=1) - self.assert_length(definitions.sinks[1][1], expected_length=3) - - def test_parse_section(self): - l = list(trigger_definitions_parser.parse_section(iter(['get']))) - self.assert_length(l, expected_length=1) - self.assertEqual(l[0][0], 'get') - self.assertEqual(l[0][1], list()) - - l = list(trigger_definitions_parser.parse_section(iter(['get', 'get -> a, b, c d s aq a']))) - self.assert_length(l, expected_length=2) - self.assertEqual(l[0][0], 'get') - self.assertEqual(l[1][0], 'get') - self.assertEqual(l[1][1], ['a', 'b', 'c d s aq a']) - self.assert_length(l[1][1], expected_length=3) - - def test_label_contains(self): - cfg_node = Node('label', None, line_number=None, path=None) - trigger_words = [('get', [])] - l = list(vulnerabilities.label_contains(cfg_node, trigger_words)) - self.assert_length(l, expected_length=0) - - cfg_node = Node('request.get("stefan")', None, line_number=None, path=None) - trigger_words = [('get', []), ('request', [])] - l = list(vulnerabilities.label_contains(cfg_node, trigger_words)) - self.assert_length(l, expected_length=2) - - trigger_node_1 = l[0] - trigger_node_2 = l[1] - self.assertEqual(trigger_node_1.trigger_word, 'get') - self.assertEqual(trigger_node_1.cfg_node, cfg_node) - self.assertEqual(trigger_node_2.trigger_word, 'request') - self.assertEqual(trigger_node_2.cfg_node, cfg_node) - - cfg_node = Node('request.get("stefan")', None, line_number=None, path=None) - trigger_words = [('get', []), ('get', [])] - l = list(vulnerabilities.label_contains(cfg_node, trigger_words)) - self.assert_length(l, expected_length=2) - - def test_find_triggers(self): - self.cfg_create_from_file('example/vulnerable_code/XSS.py') - - cfg_list = [self.cfg] - - FrameworkAdaptor(cfg_list, [], [], is_flask_route_function) - - XSS1 = cfg_list[1] - trigger_words = [('get', [])] - - l = vulnerabilities.find_triggers(XSS1.nodes, trigger_words, nosec_lines) - self.assert_length(l, expected_length=1) - - - def test_find_sanitiser_nodes(self): - cfg_node = Node(None, None, line_number=None, path=None) - sanitiser_tuple = vulnerabilities.Sanitiser('escape', cfg_node) - sanitiser = 'escape' + def run_analysis(self, path): + path = os.path.normpath(path) - result = list(vulnerabilities.find_sanitiser_nodes(sanitiser, [sanitiser_tuple])) - self.assert_length(result, expected_length=1) - self.assertEqual(result[0], cfg_node) + project_modules = get_modules(os.path.dirname(path)) + local_modules = get_directory_modules(os.path.dirname(path)) + self.cfg_create_from_file(path, project_modules, local_modules) - def test_build_sanitiser_node_dict(self): - self.cfg_create_from_file('example/vulnerable_code/XSS_sanitised.py') cfg_list = [self.cfg] FrameworkAdaptor(cfg_list, [], [], is_flask_route_function) - cfg = cfg_list[1] - - cfg_node = Node(None, None, line_number=None, path=None) - sinks_in_file = [vulnerabilities.TriggerNode('replace', ['escape'], cfg_node)] - - sanitiser_dict = vulnerabilities.build_sanitiser_node_dict(cfg, sinks_in_file) - self.assert_length(sanitiser_dict, expected_length=1) - self.assertIn('escape', sanitiser_dict.keys()) - - self.assertEqual(sanitiser_dict['escape'][0], cfg.nodes[3]) - - def run_analysis(self, path): - self.cfg_create_from_file(path) - - cfg_list = [self.cfg] - - FrameworkAdaptor(cfg_list, [], [], is_flask_route_function) initialize_constraint_table(cfg_list) analyse(cfg_list, analysis_type=ReachingDefinitionsTaintAnalysis) - return vulnerabilities.find_vulnerabilities( + return find_vulnerabilities( cfg_list, ReachingDefinitionsTaintAnalysis, UImode.NORMAL, @@ -144,416 +44,252 @@ def run_analysis(self, path): nosec_lines ) - def test_find_vulnerabilities_assign_other_var(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_assign_to_other_var.py') - self.assert_length(vulnerabilities, expected_length=1) - - def test_find_vulnerabilities_inter_command_injection(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/inter_command_injection.py') - self.assert_length(vulnerabilities, expected_length=1) + def test_find_vulnerabilities_absolute_from_file_command_injection(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/absolute_from_file_command_injection.py') - def test_find_vulnerabilities_inter_command_injection_2(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/inter_command_injection_2.py') self.assert_length(vulnerabilities, expected_length=1) - def test_XSS_result(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/XSS.py') + def test_find_vulnerabilities_absolute_from_file_command_injection_2(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/absolute_from_file_command_injection_2.py') self.assert_length(vulnerabilities, expected_length=1) - vulnerability_description = str(vulnerabilities[0]) - EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: example/vulnerable_code/XSS.py - > User input at line 6, trigger word "request.args.get(": - ~call_1 = ret_request.args.get('param', 'not set') - Reassigned in: - File: example/vulnerable_code/XSS.py - > Line 6: param = ~call_1 - File: example/vulnerable_code/XSS.py - > Line 9: ~call_3 = ret_make_response(~call_4) - File: example/vulnerable_code/XSS.py - > Line 9: resp = ~call_3 - File: example/vulnerable_code/XSS.py - > Line 10: ret_XSS1 = resp - File: example/vulnerable_code/XSS.py - > reaches line 9, trigger word "replace(": - ~call_4 = ret_html.replace('{{ param }}', param) - """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + def test_no_false_positive_absolute_from_file_command_injection_3(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/no_false_positive_absolute_from_file_command_injection_3.py') + self.assert_length(vulnerabilities, expected_length=0) - def test_command_injection_result(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/command_injection.py') + def test_blackbox_library_call(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/blackbox_library_call.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: example/vulnerable_code/command_injection.py - > User input at line 15, trigger word "form[": - param = request.form['suggestion'] + File: examples/vulnerable_code_across_files/blackbox_library_call.py + > User input at line 12, trigger word "request.args.get(": + ~call_1 = ret_request.args.get('suggestion') Reassigned in: - File: example/vulnerable_code/command_injection.py - > Line 16: command = 'echo ' + param + ' >> ' + 'menu.txt' - File: example/vulnerable_code/command_injection.py - > reaches line 18, trigger word "subprocess.call(": - ~call_1 = ret_subprocess.call(command, shell=True) + File: examples/vulnerable_code_across_files/blackbox_library_call.py + > Line 12: param = ~call_1 + File: examples/vulnerable_code_across_files/blackbox_library_call.py + > Line 15: ~call_2 = ret_scrypt.encrypt('echo ' + param + ' >> ' + 'menu.txt', 'password') + File: examples/vulnerable_code_across_files/blackbox_library_call.py + > Line 15: command = ~call_2 + File: examples/vulnerable_code_across_files/blackbox_library_call.py + > Line 16: hey = command + File: examples/vulnerable_code_across_files/blackbox_library_call.py + > reaches line 17, trigger word "subprocess.call(": + ~call_3 = ret_subprocess.call(hey, shell=True) + This vulnerability is unknown due to: Label: ~call_2 = ret_scrypt.encrypt('echo ' + param + ' >> ' + 'menu.txt', 'password') """ self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - def test_path_traversal_result(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/path_traversal.py') + def test_builtin_with_user_defined_inner(self): + vulnerabilities = self.run_analysis('examples/nested_functions_code/builtin_with_user_defined_inner.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: example/vulnerable_code/path_traversal.py - > User input at line 15, trigger word "request.args.get(": - ~call_1 = ret_request.args.get('image_name') + File: examples/nested_functions_code/builtin_with_user_defined_inner.py + > User input at line 16, trigger word "form[": + req_param = request.form['suggestion'] Reassigned in: - File: example/vulnerable_code/path_traversal.py - > Line 15: image_name = ~call_1 - File: example/vulnerable_code/path_traversal.py - > Line 6: save_2_image_name = image_name - File: example/vulnerable_code/path_traversal.py - > Line 10: save_3_image_name = image_name - File: example/vulnerable_code/path_traversal.py - > Line 10: image_name = save_3_image_name - File: example/vulnerable_code/path_traversal.py - > Line 19: temp_2_other_arg = image_name - File: example/vulnerable_code/path_traversal.py - > Line 6: other_arg = temp_2_other_arg - File: example/vulnerable_code/path_traversal.py - > Line 7: outer_ret_val = outer_arg + 'hey' + other_arg - File: example/vulnerable_code/path_traversal.py - > Line 8: ret_outer = outer_ret_val - File: example/vulnerable_code/path_traversal.py - > Line 6: image_name = save_2_image_name - File: example/vulnerable_code/path_traversal.py - > Line 19: ~call_2 = ret_outer - File: example/vulnerable_code/path_traversal.py - > Line 19: foo = ~call_2 - File: example/vulnerable_code/path_traversal.py - > reaches line 20, trigger word "send_file(": - ~call_4 = ret_send_file(foo) + File: examples/nested_functions_code/builtin_with_user_defined_inner.py + > Line 10: save_2_req_param = req_param + File: examples/nested_functions_code/builtin_with_user_defined_inner.py + > Line 19: temp_2_inner_arg = req_param + File: examples/nested_functions_code/builtin_with_user_defined_inner.py + > Line 10: inner_arg = temp_2_inner_arg + File: examples/nested_functions_code/builtin_with_user_defined_inner.py + > Line 11: yes_vuln = inner_arg + 'hey' + File: examples/nested_functions_code/builtin_with_user_defined_inner.py + > Line 12: ret_inner = yes_vuln + File: examples/nested_functions_code/builtin_with_user_defined_inner.py + > Line 10: req_param = save_2_req_param + File: examples/nested_functions_code/builtin_with_user_defined_inner.py + > Line 19: ~call_2 = ret_inner + File: examples/nested_functions_code/builtin_with_user_defined_inner.py + > Line 19: ~call_1 = ret_scrypt.encrypt(~call_2) + File: examples/nested_functions_code/builtin_with_user_defined_inner.py + > Line 19: foo = ~call_1 + File: examples/nested_functions_code/builtin_with_user_defined_inner.py + > reaches line 20, trigger word "subprocess.call(": + ~call_3 = ret_subprocess.call(foo, shell=True) + This vulnerability is unknown due to: Label: ~call_1 = ret_scrypt.encrypt(~call_2) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - def test_ensure_saved_scope(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/ensure_saved_scope.py') + def test_sink_with_result_of_blackbox_nested(self): + vulnerabilities = self.run_analysis('examples/nested_functions_code/sink_with_result_of_blackbox_nested.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: example/vulnerable_code/ensure_saved_scope.py - > User input at line 15, trigger word "request.args.get(": - ~call_1 = ret_request.args.get('image_name') + File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py + > User input at line 12, trigger word "form[": + req_param = request.form['suggestion'] Reassigned in: - File: example/vulnerable_code/ensure_saved_scope.py - > Line 15: image_name = ~call_1 - File: example/vulnerable_code/ensure_saved_scope.py - > Line 6: save_2_image_name = image_name - File: example/vulnerable_code/ensure_saved_scope.py - > Line 10: save_3_image_name = image_name - File: example/vulnerable_code/ensure_saved_scope.py - > Line 10: image_name = save_3_image_name - File: example/vulnerable_code/ensure_saved_scope.py - > Line 19: temp_2_other_arg = image_name - File: example/vulnerable_code/ensure_saved_scope.py - > Line 6: other_arg = temp_2_other_arg - File: example/vulnerable_code/ensure_saved_scope.py - > Line 7: outer_ret_val = outer_arg + 'hey' + other_arg - File: example/vulnerable_code/ensure_saved_scope.py - > Line 8: ret_outer = outer_ret_val - File: example/vulnerable_code/ensure_saved_scope.py - > Line 6: image_name = save_2_image_name - File: example/vulnerable_code/ensure_saved_scope.py - > Line 19: ~call_2 = ret_outer - File: example/vulnerable_code/ensure_saved_scope.py - > Line 19: foo = ~call_2 - File: example/vulnerable_code/ensure_saved_scope.py - > reaches line 20, trigger word "send_file(": - ~call_4 = ret_send_file(image_name) + File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py + > Line 13: ~call_2 = ret_scrypt.encrypt(req_param) + File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py + > Line 13: ~call_1 = ret_scrypt.encrypt(~call_2) + File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py + > Line 13: result = ~call_1 + File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py + > reaches line 14, trigger word "subprocess.call(": + ~call_3 = ret_subprocess.call(result, shell=True) + This vulnerability is unknown due to: Label: ~call_2 = ret_scrypt.encrypt(req_param) """ - - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - - def test_path_traversal_sanitised_result(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/path_traversal_sanitised.py') - self.assert_length(vulnerabilities, expected_length=1) - vulnerability_description = str(vulnerabilities[0]) - EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: example/vulnerable_code/path_traversal_sanitised.py - > User input at line 8, trigger word "request.args.get(": - ~call_1 = ret_request.args.get('image_name') + OTHER_EXPECTED_VULNERABILITY_DESCRIPTION = """ + File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py + > User input at line 12, trigger word "form[": + req_param = request.form['suggestion'] Reassigned in: - File: example/vulnerable_code/path_traversal_sanitised.py - > Line 8: image_name = ~call_1 - File: example/vulnerable_code/path_traversal_sanitised.py - > Line 10: ~call_2 = ret_image_name.replace('..', '') - File: example/vulnerable_code/path_traversal_sanitised.py - > Line 10: image_name = ~call_2 - File: example/vulnerable_code/path_traversal_sanitised.py - > Line 12: ~call_4 = ret_os.path.join(~call_5, image_name) - File: example/vulnerable_code/path_traversal_sanitised.py - > Line 12: ret_cat_picture = ~call_3 - File: example/vulnerable_code/path_traversal_sanitised.py - > reaches line 12, trigger word "send_file(": - ~call_3 = ret_send_file(~call_4) - This vulnerability is sanitised by: Label: ~call_2 = ret_image_name.replace('..', '') + File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py + > Line 13: ~call_2 = ret_scrypt.encrypt(req_param) + File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py + > Line 13: ~call_1 = ret_scrypt.encrypt(~call_2) + File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py + > Line 13: result = ~call_1 + File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py + > reaches line 14, trigger word "subprocess.call(": + ~call_3 = ret_subprocess.call(result, shell=True) + This vulnerability is unknown due to: Label: ~call_1 = ret_scrypt.encrypt(~call_2) """ + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) + or + self.string_compare_alpha(vulnerability_description, OTHER_EXPECTED_VULNERABILITY_DESCRIPTION)) - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - - def test_path_traversal_sanitised_2_result(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/path_traversal_sanitised_2.py') + def test_sink_with_result_of_user_defined_nested(self): + vulnerabilities = self.run_analysis('examples/nested_functions_code/sink_with_result_of_user_defined_nested.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: example/vulnerable_code/path_traversal_sanitised_2.py - > User input at line 8, trigger word "request.args.get(": - ~call_1 = ret_request.args.get('image_name') + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > User input at line 16, trigger word "form[": + req_param = request.form['suggestion'] Reassigned in: - File: example/vulnerable_code/path_traversal_sanitised_2.py - > Line 8: image_name = ~call_1 - File: example/vulnerable_code/path_traversal_sanitised_2.py - > Line 12: ~call_3 = ret_os.path.join(~call_4, image_name) - File: example/vulnerable_code/path_traversal_sanitised_2.py - > Line 12: ret_cat_picture = ~call_2 - File: example/vulnerable_code/path_traversal_sanitised_2.py - > reaches line 12, trigger word "send_file(": - ~call_2 = ret_send_file(~call_3) - This vulnerability is potentially sanitised by: Label: if '..' in image_name: + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > Line 6: save_1_req_param = req_param + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > Line 10: save_2_req_param = req_param + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > Line 17: temp_2_inner_arg = req_param + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > Line 10: inner_arg = temp_2_inner_arg + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > Line 11: inner_ret_val = inner_arg + 'hey' + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > Line 12: ret_inner = inner_ret_val + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > Line 10: req_param = save_2_req_param + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > Line 17: ~call_2 = ret_inner + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > Line 17: temp_1_outer_arg = ~call_2 + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > Line 6: outer_arg = temp_1_outer_arg + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > Line 7: outer_ret_val = outer_arg + 'hey' + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > Line 8: ret_outer = outer_ret_val + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > Line 6: req_param = save_1_req_param + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > Line 17: ~call_1 = ret_outer + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > Line 17: result = ~call_1 + File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py + > reaches line 18, trigger word "subprocess.call(": + ~call_3 = ret_subprocess.call(result, shell=True) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - def test_sql_result(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/sql/sqli.py') + def test_sink_with_blackbox_inner(self): + vulnerabilities = self.run_analysis('examples/nested_functions_code/sink_with_blackbox_inner.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: example/vulnerable_code/sql/sqli.py - > User input at line 26, trigger word "request.args.get(": - ~call_1 = ret_request.args.get('param', 'not set') + File: examples/nested_functions_code/sink_with_blackbox_inner.py + > User input at line 12, trigger word "form[": + req_param = request.form['suggestion'] Reassigned in: - File: example/vulnerable_code/sql/sqli.py - > Line 26: param = ~call_1 - File: example/vulnerable_code/sql/sqli.py - > Line 27: result = ~call_2 - File: example/vulnerable_code/sql/sqli.py - > reaches line 27, trigger word "execute(": - ~call_2 = ret_db.engine.execute(param) + File: examples/nested_functions_code/sink_with_blackbox_inner.py + > Line 14: ~call_3 = ret_scrypt.encypt(req_param) + File: examples/nested_functions_code/sink_with_blackbox_inner.py + > Line 14: ~call_2 = ret_scrypt.encypt(~call_3) + File: examples/nested_functions_code/sink_with_blackbox_inner.py + > reaches line 14, trigger word "subprocess.call(": + ~call_1 = ret_subprocess.call(~call_2, shell=True) + This vulnerability is unknown due to: Label: ~call_2 = ret_scrypt.encypt(~call_3) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - - def test_XSS_form_result(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_form.py') - self.assert_length(vulnerabilities, expected_length=1) - vulnerability_description = str(vulnerabilities[0]) - EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: example/vulnerable_code/XSS_form.py - > User input at line 14, trigger word "form[": - data = request.form['my_text'] + OTHER_EXPECTED_VULNERABILITY_DESCRIPTION = """ + File: examples/nested_functions_code/sink_with_blackbox_inner.py + > User input at line 12, trigger word "form[": + req_param = request.form['suggestion'] Reassigned in: - File: example/vulnerable_code/XSS_form.py - > Line 15: ~call_1 = ret_make_response(~call_2) - File: example/vulnerable_code/XSS_form.py - > Line 15: resp = ~call_1 - File: example/vulnerable_code/XSS_form.py - > Line 17: ret_example2_action = resp - File: example/vulnerable_code/XSS_form.py - > reaches line 15, trigger word "replace(": - ~call_2 = ret_html1.replace('{{ data }}', data) + File: examples/nested_functions_code/sink_with_blackbox_inner.py + > Line 14: ~call_3 = ret_scrypt.encypt(req_param) + File: examples/nested_functions_code/sink_with_blackbox_inner.py + > Line 14: ~call_2 = ret_scrypt.encypt(~call_3) + File: examples/nested_functions_code/sink_with_blackbox_inner.py + > reaches line 14, trigger word "subprocess.call(": + ~call_1 = ret_subprocess.call(~call_2, shell=True) + This vulnerability is unknown due to: Label: ~call_3 = ret_scrypt.encypt(req_param) """ + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) + or + self.string_compare_alpha(vulnerability_description, OTHER_EXPECTED_VULNERABILITY_DESCRIPTION)) - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - - def test_XSS_url_result(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_url.py') + def test_sink_with_user_defined_inner(self): + vulnerabilities = self.run_analysis('examples/nested_functions_code/sink_with_user_defined_inner.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: example/vulnerable_code/XSS_url.py - > User input at line 4, trigger word "Framework function URL parameter": - url + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > User input at line 16, trigger word "form[": + req_param = request.form['suggestion'] Reassigned in: - File: example/vulnerable_code/XSS_url.py - > Line 6: param = url - File: example/vulnerable_code/XSS_url.py - > Line 9: ~call_2 = ret_make_response(~call_3) - File: example/vulnerable_code/XSS_url.py - > Line 9: resp = ~call_2 - File: example/vulnerable_code/XSS_url.py - > Line 10: ret_XSS1 = resp - File: example/vulnerable_code/XSS_url.py - > reaches line 9, trigger word "replace(": - ~call_3 = ret_html.replace('{{ param }}', param) + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > Line 6: save_2_req_param = req_param + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > Line 10: save_3_req_param = req_param + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > Line 18: temp_3_inner_arg = req_param + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > Line 10: inner_arg = temp_3_inner_arg + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > Line 11: inner_ret_val = inner_arg + 'hey' + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > Line 12: ret_inner = inner_ret_val + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > Line 10: req_param = save_3_req_param + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > Line 18: ~call_3 = ret_inner + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > Line 18: temp_2_outer_arg = ~call_3 + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > Line 6: outer_arg = temp_2_outer_arg + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > Line 7: outer_ret_val = outer_arg + 'hey' + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > Line 8: ret_outer = outer_ret_val + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > Line 6: req_param = save_2_req_param + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > Line 18: ~call_2 = ret_outer + File: examples/nested_functions_code/sink_with_user_defined_inner.py + > reaches line 18, trigger word "subprocess.call(": + ~call_1 = ret_subprocess.call(~call_2, shell=True) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - def test_XSS_no_vuln_result(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_no_vuln.py') - self.assert_length(vulnerabilities, expected_length=0) + def test_find_vulnerabilities_import_file_command_injection(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/import_file_command_injection.py') - def test_XSS_reassign_result(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_reassign.py') self.assert_length(vulnerabilities, expected_length=1) - vulnerability_description = str(vulnerabilities[0]) - EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: example/vulnerable_code/XSS_reassign.py - > User input at line 6, trigger word "request.args.get(": - ~call_1 = ret_request.args.get('param', 'not set') - Reassigned in: - File: example/vulnerable_code/XSS_reassign.py - > Line 6: param = ~call_1 - File: example/vulnerable_code/XSS_reassign.py - > Line 8: param = param + '' - File: example/vulnerable_code/XSS_reassign.py - > Line 11: ~call_3 = ret_make_response(~call_4) - File: example/vulnerable_code/XSS_reassign.py - > Line 11: resp = ~call_3 - File: example/vulnerable_code/XSS_reassign.py - > Line 12: ret_XSS1 = resp - File: example/vulnerable_code/XSS_reassign.py - > reaches line 11, trigger word "replace(": - ~call_4 = ret_html.replace('{{ param }}', param) - """ - - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - def test_XSS_sanitised_result(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_sanitised.py') + def test_find_vulnerabilities_import_file_command_injection_2(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/import_file_command_injection_2.py') self.assert_length(vulnerabilities, expected_length=1) - vulnerability_description = str(vulnerabilities[0]) - EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: example/vulnerable_code/XSS_sanitised.py - > User input at line 7, trigger word "request.args.get(": - ~call_1 = ret_request.args.get('param', 'not set') - Reassigned in: - File: example/vulnerable_code/XSS_sanitised.py - > Line 7: param = ~call_1 - File: example/vulnerable_code/XSS_sanitised.py - > Line 9: ~call_2 = ret_Markup.escape(param) - File: example/vulnerable_code/XSS_sanitised.py - > Line 9: param = ~call_2 - File: example/vulnerable_code/XSS_sanitised.py - > Line 12: ~call_4 = ret_make_response(~call_5) - File: example/vulnerable_code/XSS_sanitised.py - > Line 12: resp = ~call_4 - File: example/vulnerable_code/XSS_sanitised.py - > Line 13: ret_XSS1 = resp - File: example/vulnerable_code/XSS_sanitised.py - > reaches line 12, trigger word "replace(": - ~call_5 = ret_html.replace('{{ param }}', param) - This vulnerability is sanitised by: Label: ~call_2 = ret_Markup.escape(param) - """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - - def test_XSS_variable_assign_no_vuln_result(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_variable_assign_no_vuln.py') + def test_no_false_positive_import_file_command_injection_3(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/no_false_positive_import_file_command_injection_3.py') self.assert_length(vulnerabilities, expected_length=0) - - def test_XSS_variable_assign_result(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_variable_assign.py') - self.assert_length(vulnerabilities, expected_length=1) - vulnerability_description = str(vulnerabilities[0]) - EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: example/vulnerable_code/XSS_variable_assign.py - > User input at line 6, trigger word "request.args.get(": - ~call_1 = ret_request.args.get('param', 'not set') - Reassigned in: - File: example/vulnerable_code/XSS_variable_assign.py - > Line 6: param = ~call_1 - File: example/vulnerable_code/XSS_variable_assign.py - > Line 8: other_var = param + '' - File: example/vulnerable_code/XSS_variable_assign.py - > Line 11: ~call_3 = ret_make_response(~call_4) - File: example/vulnerable_code/XSS_variable_assign.py - > Line 11: resp = ~call_3 - File: example/vulnerable_code/XSS_variable_assign.py - > Line 12: ret_XSS1 = resp - File: example/vulnerable_code/XSS_variable_assign.py - > reaches line 11, trigger word "replace(": - ~call_4 = ret_html.replace('{{ param }}', other_var) - """ - - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - - def test_XSS_variable_multiple_assign_result(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/XSS_variable_multiple_assign.py') - self.assert_length(vulnerabilities, expected_length=1) - vulnerability_description = str(vulnerabilities[0]) - EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: example/vulnerable_code/XSS_variable_multiple_assign.py - > User input at line 6, trigger word "request.args.get(": - ~call_1 = ret_request.args.get('param', 'not set') - Reassigned in: - File: example/vulnerable_code/XSS_variable_multiple_assign.py - > Line 6: param = ~call_1 - File: example/vulnerable_code/XSS_variable_multiple_assign.py - > Line 8: other_var = param + '' - File: example/vulnerable_code/XSS_variable_multiple_assign.py - > Line 10: not_the_same_var = '' + other_var - File: example/vulnerable_code/XSS_variable_multiple_assign.py - > Line 12: another_one = not_the_same_var + '' - File: example/vulnerable_code/XSS_variable_multiple_assign.py - > Line 15: ~call_3 = ret_make_response(~call_4) - File: example/vulnerable_code/XSS_variable_multiple_assign.py - > Line 15: resp = ~call_3 - File: example/vulnerable_code/XSS_variable_multiple_assign.py - > Line 17: ret_XSS1 = resp - File: example/vulnerable_code/XSS_variable_multiple_assign.py - > reaches line 15, trigger word "replace(": - ~call_4 = ret_html.replace('{{ param }}', another_one) - """ - - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - - -class EngineDjangoTest(BaseTestCase): - def run_empty(self): - return - - def run_analysis(self, path): - self.cfg_create_from_file(path) - cfg_list = [self.cfg] - - FrameworkAdaptor(cfg_list, [], [], is_django_view_function) - initialize_constraint_table(cfg_list) - - analyse(cfg_list, analysis_type=ReachingDefinitionsTaintAnalysis) - - trigger_word_file = os.path.join( - 'pyt', - 'vulnerability_definitions', - 'django_trigger_words.pyt' - ) - - return vulnerabilities.find_vulnerabilities( - cfg_list, - ReachingDefinitionsTaintAnalysis, - UImode.NORMAL, - VulnerabilityFiles( - default_blackbox_mapping_file, - trigger_word_file - ), - nosec_lines - ) - - def test_django_view_param(self): - vulnerabilities = self.run_analysis('example/vulnerable_code/django_XSS.py') - self.assert_length(vulnerabilities, expected_length=2) - vulnerability_description = str(vulnerabilities[0]) - - EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: example/vulnerable_code/django_XSS.py - > User input at line 4, trigger word "Framework function URL parameter": - param - Reassigned in: - File: example/vulnerable_code/django_XSS.py - > Line 5: ret_xss1 = ~call_1 - File: example/vulnerable_code/django_XSS.py - > reaches line 5, trigger word "render(": - ~call_1 = ret_render(request, 'templates/xss.html', 'param'param) - """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) From bed2f77fc96e07ad6006df540cd450afd5b6b039 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Thu, 26 Apr 2018 18:53:40 +0300 Subject: [PATCH 057/291] added nosec_lines --- tests/vulnerabilities_test.py | 697 ++++++++++++++++++++++++---------- 1 file changed, 497 insertions(+), 200 deletions(-) diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index b30d415b..381860ae 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -1,6 +1,11 @@ import os from .base_test_case import BaseTestCase + +from pyt import ( + trigger_definitions_parser, + vulnerabilities +) from pyt.argument_helpers import ( default_blackbox_mapping_file, default_trigger_word_file, @@ -10,30 +15,123 @@ from pyt.constraint_table import initialize_constraint_table from pyt.fixed_point import analyse from pyt.framework_adaptor import FrameworkAdaptor -from pyt.framework_helper import is_flask_route_function -from pyt.project_handler import get_directory_modules, get_modules +from pyt.framework_helper import ( + is_django_view_function, + is_flask_route_function, + is_function +) +from pyt.node_types import Node from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis -from pyt.vulnerabilities import find_vulnerabilities nosec_lines = set() class EngineTest(BaseTestCase): - def run_analysis(self, path): - path = os.path.normpath(path) + def run_empty(self): + return + + def get_lattice_elements(self, cfg_nodes): + """Dummy analysis method""" + return cfg_nodes + + def test_parse(self): + definitions = vulnerabilities.parse( + trigger_word_file=os.path.join( + os.getcwd(), + 'pyt', + 'vulnerability_definitions', + 'test_triggers.pyt' + ) + ) + + self.assert_length(definitions.sources, expected_length=1) + self.assert_length(definitions.sinks, expected_length=3) + self.assert_length(definitions.sinks[0][1], expected_length=1) + self.assert_length(definitions.sinks[1][1], expected_length=3) + + def test_parse_section(self): + l = list(trigger_definitions_parser.parse_section(iter(['get']))) + self.assert_length(l, expected_length=1) + self.assertEqual(l[0][0], 'get') + self.assertEqual(l[0][1], list()) + + l = list(trigger_definitions_parser.parse_section(iter(['get', 'get -> a, b, c d s aq a']))) + self.assert_length(l, expected_length=2) + self.assertEqual(l[0][0], 'get') + self.assertEqual(l[1][0], 'get') + self.assertEqual(l[1][1], ['a', 'b', 'c d s aq a']) + self.assert_length(l[1][1], expected_length=3) + + def test_label_contains(self): + cfg_node = Node('label', None, line_number=None, path=None) + trigger_words = [('get', [])] + l = list(vulnerabilities.label_contains(cfg_node, trigger_words)) + self.assert_length(l, expected_length=0) + + cfg_node = Node('request.get("stefan")', None, line_number=None, path=None) + trigger_words = [('get', []), ('request', [])] + l = list(vulnerabilities.label_contains(cfg_node, trigger_words)) + self.assert_length(l, expected_length=2) + + trigger_node_1 = l[0] + trigger_node_2 = l[1] + self.assertEqual(trigger_node_1.trigger_word, 'get') + self.assertEqual(trigger_node_1.cfg_node, cfg_node) + self.assertEqual(trigger_node_2.trigger_word, 'request') + self.assertEqual(trigger_node_2.cfg_node, cfg_node) + + cfg_node = Node('request.get("stefan")', None, line_number=None, path=None) + trigger_words = [('get', []), ('get', [])] + l = list(vulnerabilities.label_contains(cfg_node, trigger_words)) + self.assert_length(l, expected_length=2) + + def test_find_triggers(self): + self.cfg_create_from_file('examples/vulnerable_code/XSS.py') + + cfg_list = [self.cfg] + + FrameworkAdaptor(cfg_list, [], [], is_flask_route_function) + + XSS1 = cfg_list[1] + trigger_words = [('get', [])] + + l = vulnerabilities.find_triggers(XSS1.nodes, trigger_words, nosec_lines) + self.assert_length(l, expected_length=1) - project_modules = get_modules(os.path.dirname(path)) - local_modules = get_directory_modules(os.path.dirname(path)) + def test_find_sanitiser_nodes(self): + cfg_node = Node(None, None, line_number=None, path=None) + sanitiser_tuple = vulnerabilities.Sanitiser('escape', cfg_node) + sanitiser = 'escape' - self.cfg_create_from_file(path, project_modules, local_modules) + result = list(vulnerabilities.find_sanitiser_nodes(sanitiser, [sanitiser_tuple])) + self.assert_length(result, expected_length=1) + self.assertEqual(result[0], cfg_node) + def test_build_sanitiser_node_dict(self): + self.cfg_create_from_file('examples/vulnerable_code/XSS_sanitised.py') cfg_list = [self.cfg] FrameworkAdaptor(cfg_list, [], [], is_flask_route_function) + cfg = cfg_list[1] + + cfg_node = Node(None, None, line_number=None, path=None) + sinks_in_file = [vulnerabilities.TriggerNode('replace', ['escape'], cfg_node)] + + sanitiser_dict = vulnerabilities.build_sanitiser_node_dict(cfg, sinks_in_file) + self.assert_length(sanitiser_dict, expected_length=1) + self.assertIn('escape', sanitiser_dict.keys()) + + self.assertEqual(sanitiser_dict['escape'][0], cfg.nodes[3]) + + def run_analysis(self, path): + self.cfg_create_from_file(path) + cfg_list = [self.cfg] + + FrameworkAdaptor(cfg_list, [], [], is_flask_route_function) initialize_constraint_table(cfg_list) analyse(cfg_list, analysis_type=ReachingDefinitionsTaintAnalysis) - return find_vulnerabilities( + return vulnerabilities.find_vulnerabilities( cfg_list, ReachingDefinitionsTaintAnalysis, UImode.NORMAL, @@ -44,252 +142,451 @@ def run_analysis(self, path): nosec_lines ) - def test_find_vulnerabilities_absolute_from_file_command_injection(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/absolute_from_file_command_injection.py') + def test_find_vulnerabilities_assign_other_var(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_assign_to_other_var.py') + self.assert_length(vulnerabilities, expected_length=1) + def test_find_vulnerabilities_inter_command_injection(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/inter_command_injection.py') self.assert_length(vulnerabilities, expected_length=1) - def test_find_vulnerabilities_absolute_from_file_command_injection_2(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/absolute_from_file_command_injection_2.py') + def test_find_vulnerabilities_inter_command_injection_2(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/inter_command_injection_2.py') self.assert_length(vulnerabilities, expected_length=1) - def test_no_false_positive_absolute_from_file_command_injection_3(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/no_false_positive_absolute_from_file_command_injection_3.py') - self.assert_length(vulnerabilities, expected_length=0) + def test_XSS_result(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS.py') + self.assert_length(vulnerabilities, expected_length=1) + vulnerability_description = str(vulnerabilities[0]) + EXPECTED_VULNERABILITY_DESCRIPTION = """ + File: examples/vulnerable_code/XSS.py + > User input at line 6, trigger word "request.args.get(": + ~call_1 = ret_request.args.get('param', 'not set') + Reassigned in: + File: examples/vulnerable_code/XSS.py + > Line 6: param = ~call_1 + File: examples/vulnerable_code/XSS.py + > Line 9: ~call_3 = ret_make_response(~call_4) + File: examples/vulnerable_code/XSS.py + > Line 9: resp = ~call_3 + File: examples/vulnerable_code/XSS.py + > Line 10: ret_XSS1 = resp + File: examples/vulnerable_code/XSS.py + > reaches line 9, trigger word "replace(": + ~call_4 = ret_html.replace('{{ param }}', param) + """ - def test_blackbox_library_call(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/blackbox_library_call.py') + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + + def test_command_injection_result(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/command_injection.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/vulnerable_code_across_files/blackbox_library_call.py - > User input at line 12, trigger word "request.args.get(": - ~call_1 = ret_request.args.get('suggestion') + File: examples/vulnerable_code/command_injection.py + > User input at line 15, trigger word "form[": + param = request.form['suggestion'] Reassigned in: - File: examples/vulnerable_code_across_files/blackbox_library_call.py - > Line 12: param = ~call_1 - File: examples/vulnerable_code_across_files/blackbox_library_call.py - > Line 15: ~call_2 = ret_scrypt.encrypt('echo ' + param + ' >> ' + 'menu.txt', 'password') - File: examples/vulnerable_code_across_files/blackbox_library_call.py - > Line 15: command = ~call_2 - File: examples/vulnerable_code_across_files/blackbox_library_call.py - > Line 16: hey = command - File: examples/vulnerable_code_across_files/blackbox_library_call.py - > reaches line 17, trigger word "subprocess.call(": - ~call_3 = ret_subprocess.call(hey, shell=True) - This vulnerability is unknown due to: Label: ~call_2 = ret_scrypt.encrypt('echo ' + param + ' >> ' + 'menu.txt', 'password') + File: examples/vulnerable_code/command_injection.py + > Line 16: command = 'echo ' + param + ' >> ' + 'menu.txt' + File: examples/vulnerable_code/command_injection.py + > reaches line 18, trigger word "subprocess.call(": + ~call_1 = ret_subprocess.call(command, shell=True) """ self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - def test_builtin_with_user_defined_inner(self): - vulnerabilities = self.run_analysis('examples/nested_functions_code/builtin_with_user_defined_inner.py') + def test_path_traversal_result(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/path_traversal.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/nested_functions_code/builtin_with_user_defined_inner.py - > User input at line 16, trigger word "form[": - req_param = request.form['suggestion'] + File: examples/vulnerable_code/path_traversal.py + > User input at line 15, trigger word "request.args.get(": + ~call_1 = ret_request.args.get('image_name') Reassigned in: - File: examples/nested_functions_code/builtin_with_user_defined_inner.py - > Line 10: save_2_req_param = req_param - File: examples/nested_functions_code/builtin_with_user_defined_inner.py - > Line 19: temp_2_inner_arg = req_param - File: examples/nested_functions_code/builtin_with_user_defined_inner.py - > Line 10: inner_arg = temp_2_inner_arg - File: examples/nested_functions_code/builtin_with_user_defined_inner.py - > Line 11: yes_vuln = inner_arg + 'hey' - File: examples/nested_functions_code/builtin_with_user_defined_inner.py - > Line 12: ret_inner = yes_vuln - File: examples/nested_functions_code/builtin_with_user_defined_inner.py - > Line 10: req_param = save_2_req_param - File: examples/nested_functions_code/builtin_with_user_defined_inner.py - > Line 19: ~call_2 = ret_inner - File: examples/nested_functions_code/builtin_with_user_defined_inner.py - > Line 19: ~call_1 = ret_scrypt.encrypt(~call_2) - File: examples/nested_functions_code/builtin_with_user_defined_inner.py - > Line 19: foo = ~call_1 - File: examples/nested_functions_code/builtin_with_user_defined_inner.py - > reaches line 20, trigger word "subprocess.call(": - ~call_3 = ret_subprocess.call(foo, shell=True) - This vulnerability is unknown due to: Label: ~call_1 = ret_scrypt.encrypt(~call_2) + File: examples/vulnerable_code/path_traversal.py + > Line 15: image_name = ~call_1 + File: examples/vulnerable_code/path_traversal.py + > Line 6: save_2_image_name = image_name + File: examples/vulnerable_code/path_traversal.py + > Line 10: save_3_image_name = image_name + File: examples/vulnerable_code/path_traversal.py + > Line 10: image_name = save_3_image_name + File: examples/vulnerable_code/path_traversal.py + > Line 19: temp_2_other_arg = image_name + File: examples/vulnerable_code/path_traversal.py + > Line 6: other_arg = temp_2_other_arg + File: examples/vulnerable_code/path_traversal.py + > Line 7: outer_ret_val = outer_arg + 'hey' + other_arg + File: examples/vulnerable_code/path_traversal.py + > Line 8: ret_outer = outer_ret_val + File: examples/vulnerable_code/path_traversal.py + > Line 6: image_name = save_2_image_name + File: examples/vulnerable_code/path_traversal.py + > Line 19: ~call_2 = ret_outer + File: examples/vulnerable_code/path_traversal.py + > Line 19: foo = ~call_2 + File: examples/vulnerable_code/path_traversal.py + > reaches line 20, trigger word "send_file(": + ~call_4 = ret_send_file(foo) """ + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - def test_sink_with_result_of_blackbox_nested(self): - vulnerabilities = self.run_analysis('examples/nested_functions_code/sink_with_result_of_blackbox_nested.py') + def test_ensure_saved_scope(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/ensure_saved_scope.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py - > User input at line 12, trigger word "form[": - req_param = request.form['suggestion'] + File: examples/vulnerable_code/ensure_saved_scope.py + > User input at line 15, trigger word "request.args.get(": + ~call_1 = ret_request.args.get('image_name') Reassigned in: - File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py - > Line 13: ~call_2 = ret_scrypt.encrypt(req_param) - File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py - > Line 13: ~call_1 = ret_scrypt.encrypt(~call_2) - File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py - > Line 13: result = ~call_1 - File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py - > reaches line 14, trigger word "subprocess.call(": - ~call_3 = ret_subprocess.call(result, shell=True) - This vulnerability is unknown due to: Label: ~call_2 = ret_scrypt.encrypt(req_param) + File: examples/vulnerable_code/ensure_saved_scope.py + > Line 15: image_name = ~call_1 + File: examples/vulnerable_code/ensure_saved_scope.py + > Line 6: save_2_image_name = image_name + File: examples/vulnerable_code/ensure_saved_scope.py + > Line 10: save_3_image_name = image_name + File: examples/vulnerable_code/ensure_saved_scope.py + > Line 10: image_name = save_3_image_name + File: examples/vulnerable_code/ensure_saved_scope.py + > Line 19: temp_2_other_arg = image_name + File: examples/vulnerable_code/ensure_saved_scope.py + > Line 6: other_arg = temp_2_other_arg + File: examples/vulnerable_code/ensure_saved_scope.py + > Line 7: outer_ret_val = outer_arg + 'hey' + other_arg + File: examples/vulnerable_code/ensure_saved_scope.py + > Line 8: ret_outer = outer_ret_val + File: examples/vulnerable_code/ensure_saved_scope.py + > Line 6: image_name = save_2_image_name + File: examples/vulnerable_code/ensure_saved_scope.py + > Line 19: ~call_2 = ret_outer + File: examples/vulnerable_code/ensure_saved_scope.py + > Line 19: foo = ~call_2 + File: examples/vulnerable_code/ensure_saved_scope.py + > reaches line 20, trigger word "send_file(": + ~call_4 = ret_send_file(image_name) """ - OTHER_EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py - > User input at line 12, trigger word "form[": - req_param = request.form['suggestion'] + + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + + def test_path_traversal_sanitised_result(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/path_traversal_sanitised.py') + self.assert_length(vulnerabilities, expected_length=1) + vulnerability_description = str(vulnerabilities[0]) + EXPECTED_VULNERABILITY_DESCRIPTION = """ + File: examples/vulnerable_code/path_traversal_sanitised.py + > User input at line 8, trigger word "request.args.get(": + ~call_1 = ret_request.args.get('image_name') Reassigned in: - File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py - > Line 13: ~call_2 = ret_scrypt.encrypt(req_param) - File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py - > Line 13: ~call_1 = ret_scrypt.encrypt(~call_2) - File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py - > Line 13: result = ~call_1 - File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py - > reaches line 14, trigger word "subprocess.call(": - ~call_3 = ret_subprocess.call(result, shell=True) - This vulnerability is unknown due to: Label: ~call_1 = ret_scrypt.encrypt(~call_2) + File: examples/vulnerable_code/path_traversal_sanitised.py + > Line 8: image_name = ~call_1 + File: examples/vulnerable_code/path_traversal_sanitised.py + > Line 10: ~call_2 = ret_image_name.replace('..', '') + File: examples/vulnerable_code/path_traversal_sanitised.py + > Line 10: image_name = ~call_2 + File: examples/vulnerable_code/path_traversal_sanitised.py + > Line 12: ~call_4 = ret_os.path.join(~call_5, image_name) + File: examples/vulnerable_code/path_traversal_sanitised.py + > Line 12: ret_cat_picture = ~call_3 + File: examples/vulnerable_code/path_traversal_sanitised.py + > reaches line 12, trigger word "send_file(": + ~call_3 = ret_send_file(~call_4) + This vulnerability is sanitised by: Label: ~call_2 = ret_image_name.replace('..', '') """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) - or - self.string_compare_alpha(vulnerability_description, OTHER_EXPECTED_VULNERABILITY_DESCRIPTION)) - def test_sink_with_result_of_user_defined_nested(self): - vulnerabilities = self.run_analysis('examples/nested_functions_code/sink_with_result_of_user_defined_nested.py') + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + + def test_path_traversal_sanitised_2_result(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/path_traversal_sanitised_2.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > User input at line 16, trigger word "form[": - req_param = request.form['suggestion'] + File: examples/vulnerable_code/path_traversal_sanitised_2.py + > User input at line 8, trigger word "request.args.get(": + ~call_1 = ret_request.args.get('image_name') Reassigned in: - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 6: save_1_req_param = req_param - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 10: save_2_req_param = req_param - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 17: temp_2_inner_arg = req_param - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 10: inner_arg = temp_2_inner_arg - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 11: inner_ret_val = inner_arg + 'hey' - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 12: ret_inner = inner_ret_val - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 10: req_param = save_2_req_param - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 17: ~call_2 = ret_inner - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 17: temp_1_outer_arg = ~call_2 - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 6: outer_arg = temp_1_outer_arg - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 7: outer_ret_val = outer_arg + 'hey' - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 8: ret_outer = outer_ret_val - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 6: req_param = save_1_req_param - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 17: ~call_1 = ret_outer - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 17: result = ~call_1 - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > reaches line 18, trigger word "subprocess.call(": - ~call_3 = ret_subprocess.call(result, shell=True) + File: examples/vulnerable_code/path_traversal_sanitised_2.py + > Line 8: image_name = ~call_1 + File: examples/vulnerable_code/path_traversal_sanitised_2.py + > Line 12: ~call_3 = ret_os.path.join(~call_4, image_name) + File: examples/vulnerable_code/path_traversal_sanitised_2.py + > Line 12: ret_cat_picture = ~call_2 + File: examples/vulnerable_code/path_traversal_sanitised_2.py + > reaches line 12, trigger word "send_file(": + ~call_2 = ret_send_file(~call_3) + This vulnerability is potentially sanitised by: Label: if '..' in image_name: """ + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - def test_sink_with_blackbox_inner(self): - vulnerabilities = self.run_analysis('examples/nested_functions_code/sink_with_blackbox_inner.py') + def test_sql_result(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/sql/sqli.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/nested_functions_code/sink_with_blackbox_inner.py - > User input at line 12, trigger word "form[": - req_param = request.form['suggestion'] + File: examples/vulnerable_code/sql/sqli.py + > User input at line 26, trigger word "request.args.get(": + ~call_1 = ret_request.args.get('param', 'not set') Reassigned in: - File: examples/nested_functions_code/sink_with_blackbox_inner.py - > Line 14: ~call_3 = ret_scrypt.encypt(req_param) - File: examples/nested_functions_code/sink_with_blackbox_inner.py - > Line 14: ~call_2 = ret_scrypt.encypt(~call_3) - File: examples/nested_functions_code/sink_with_blackbox_inner.py - > reaches line 14, trigger word "subprocess.call(": - ~call_1 = ret_subprocess.call(~call_2, shell=True) - This vulnerability is unknown due to: Label: ~call_2 = ret_scrypt.encypt(~call_3) + File: examples/vulnerable_code/sql/sqli.py + > Line 26: param = ~call_1 + File: examples/vulnerable_code/sql/sqli.py + > Line 27: result = ~call_2 + File: examples/vulnerable_code/sql/sqli.py + > reaches line 27, trigger word "execute(": + ~call_2 = ret_db.engine.execute(param) """ - OTHER_EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/nested_functions_code/sink_with_blackbox_inner.py - > User input at line 12, trigger word "form[": - req_param = request.form['suggestion'] + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + + def test_XSS_form_result(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_form.py') + self.assert_length(vulnerabilities, expected_length=1) + vulnerability_description = str(vulnerabilities[0]) + EXPECTED_VULNERABILITY_DESCRIPTION = """ + File: examples/vulnerable_code/XSS_form.py + > User input at line 14, trigger word "form[": + data = request.form['my_text'] Reassigned in: - File: examples/nested_functions_code/sink_with_blackbox_inner.py - > Line 14: ~call_3 = ret_scrypt.encypt(req_param) - File: examples/nested_functions_code/sink_with_blackbox_inner.py - > Line 14: ~call_2 = ret_scrypt.encypt(~call_3) - File: examples/nested_functions_code/sink_with_blackbox_inner.py - > reaches line 14, trigger word "subprocess.call(": - ~call_1 = ret_subprocess.call(~call_2, shell=True) - This vulnerability is unknown due to: Label: ~call_3 = ret_scrypt.encypt(req_param) + File: examples/vulnerable_code/XSS_form.py + > Line 15: ~call_1 = ret_make_response(~call_2) + File: examples/vulnerable_code/XSS_form.py + > Line 15: resp = ~call_1 + File: examples/vulnerable_code/XSS_form.py + > Line 17: ret_example2_action = resp + File: examples/vulnerable_code/XSS_form.py + > reaches line 15, trigger word "replace(": + ~call_2 = ret_html1.replace('{{ data }}', data) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) - or - self.string_compare_alpha(vulnerability_description, OTHER_EXPECTED_VULNERABILITY_DESCRIPTION)) - def test_sink_with_user_defined_inner(self): - vulnerabilities = self.run_analysis('examples/nested_functions_code/sink_with_user_defined_inner.py') + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + + def test_XSS_url_result(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_url.py') self.assert_length(vulnerabilities, expected_length=1) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > User input at line 16, trigger word "form[": - req_param = request.form['suggestion'] + File: examples/vulnerable_code/XSS_url.py + > User input at line 4, trigger word "Framework function URL parameter": + url Reassigned in: - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 6: save_2_req_param = req_param - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 10: save_3_req_param = req_param - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 18: temp_3_inner_arg = req_param - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 10: inner_arg = temp_3_inner_arg - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 11: inner_ret_val = inner_arg + 'hey' - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 12: ret_inner = inner_ret_val - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 10: req_param = save_3_req_param - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 18: ~call_3 = ret_inner - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 18: temp_2_outer_arg = ~call_3 - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 6: outer_arg = temp_2_outer_arg - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 7: outer_ret_val = outer_arg + 'hey' - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 8: ret_outer = outer_ret_val - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 6: req_param = save_2_req_param - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 18: ~call_2 = ret_outer - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > reaches line 18, trigger word "subprocess.call(": - ~call_1 = ret_subprocess.call(~call_2, shell=True) + File: examples/vulnerable_code/XSS_url.py + > Line 6: param = url + File: examples/vulnerable_code/XSS_url.py + > Line 9: ~call_2 = ret_make_response(~call_3) + File: examples/vulnerable_code/XSS_url.py + > Line 9: resp = ~call_2 + File: examples/vulnerable_code/XSS_url.py + > Line 10: ret_XSS1 = resp + File: examples/vulnerable_code/XSS_url.py + > reaches line 9, trigger word "replace(": + ~call_3 = ret_html.replace('{{ param }}', param) """ + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - def test_find_vulnerabilities_import_file_command_injection(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/import_file_command_injection.py') + def test_XSS_no_vuln_result(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_no_vuln.py') + self.assert_length(vulnerabilities, expected_length=0) + def test_XSS_reassign_result(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_reassign.py') self.assert_length(vulnerabilities, expected_length=1) + vulnerability_description = str(vulnerabilities[0]) + EXPECTED_VULNERABILITY_DESCRIPTION = """ + File: examples/vulnerable_code/XSS_reassign.py + > User input at line 6, trigger word "request.args.get(": + ~call_1 = ret_request.args.get('param', 'not set') + Reassigned in: + File: examples/vulnerable_code/XSS_reassign.py + > Line 6: param = ~call_1 + File: examples/vulnerable_code/XSS_reassign.py + > Line 8: param = param + '' + File: examples/vulnerable_code/XSS_reassign.py + > Line 11: ~call_3 = ret_make_response(~call_4) + File: examples/vulnerable_code/XSS_reassign.py + > Line 11: resp = ~call_3 + File: examples/vulnerable_code/XSS_reassign.py + > Line 12: ret_XSS1 = resp + File: examples/vulnerable_code/XSS_reassign.py + > reaches line 11, trigger word "replace(": + ~call_4 = ret_html.replace('{{ param }}', param) + """ + + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) - def test_find_vulnerabilities_import_file_command_injection_2(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/import_file_command_injection_2.py') + def test_XSS_sanitised_result(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_sanitised.py') self.assert_length(vulnerabilities, expected_length=1) + vulnerability_description = str(vulnerabilities[0]) + EXPECTED_VULNERABILITY_DESCRIPTION = """ + File: examples/vulnerable_code/XSS_sanitised.py + > User input at line 7, trigger word "request.args.get(": + ~call_1 = ret_request.args.get('param', 'not set') + Reassigned in: + File: examples/vulnerable_code/XSS_sanitised.py + > Line 7: param = ~call_1 + File: examples/vulnerable_code/XSS_sanitised.py + > Line 9: ~call_2 = ret_Markup.escape(param) + File: examples/vulnerable_code/XSS_sanitised.py + > Line 9: param = ~call_2 + File: examples/vulnerable_code/XSS_sanitised.py + > Line 12: ~call_4 = ret_make_response(~call_5) + File: examples/vulnerable_code/XSS_sanitised.py + > Line 12: resp = ~call_4 + File: examples/vulnerable_code/XSS_sanitised.py + > Line 13: ret_XSS1 = resp + File: examples/vulnerable_code/XSS_sanitised.py + > reaches line 12, trigger word "replace(": + ~call_5 = ret_html.replace('{{ param }}', param) + This vulnerability is sanitised by: Label: ~call_2 = ret_Markup.escape(param) + """ + + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + + def test_XSS_variable_assign_no_vuln_result(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_variable_assign_no_vuln.py') + self.assert_length(vulnerabilities, expected_length=0) + + def test_XSS_variable_assign_result(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_variable_assign.py') + self.assert_length(vulnerabilities, expected_length=1) + vulnerability_description = str(vulnerabilities[0]) + EXPECTED_VULNERABILITY_DESCRIPTION = """ + File: examples/vulnerable_code/XSS_variable_assign.py + > User input at line 6, trigger word "request.args.get(": + ~call_1 = ret_request.args.get('param', 'not set') + Reassigned in: + File: examples/vulnerable_code/XSS_variable_assign.py + > Line 6: param = ~call_1 + File: examples/vulnerable_code/XSS_variable_assign.py + > Line 8: other_var = param + '' + File: examples/vulnerable_code/XSS_variable_assign.py + > Line 11: ~call_3 = ret_make_response(~call_4) + File: examples/vulnerable_code/XSS_variable_assign.py + > Line 11: resp = ~call_3 + File: examples/vulnerable_code/XSS_variable_assign.py + > Line 12: ret_XSS1 = resp + File: examples/vulnerable_code/XSS_variable_assign.py + > reaches line 11, trigger word "replace(": + ~call_4 = ret_html.replace('{{ param }}', other_var) + """ + + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + + def test_XSS_variable_multiple_assign_result(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_variable_multiple_assign.py') + self.assert_length(vulnerabilities, expected_length=1) + vulnerability_description = str(vulnerabilities[0]) + EXPECTED_VULNERABILITY_DESCRIPTION = """ + File: examples/vulnerable_code/XSS_variable_multiple_assign.py + > User input at line 6, trigger word "request.args.get(": + ~call_1 = ret_request.args.get('param', 'not set') + Reassigned in: + File: examples/vulnerable_code/XSS_variable_multiple_assign.py + > Line 6: param = ~call_1 + File: examples/vulnerable_code/XSS_variable_multiple_assign.py + > Line 8: other_var = param + '' + File: examples/vulnerable_code/XSS_variable_multiple_assign.py + > Line 10: not_the_same_var = '' + other_var + File: examples/vulnerable_code/XSS_variable_multiple_assign.py + > Line 12: another_one = not_the_same_var + '' + File: examples/vulnerable_code/XSS_variable_multiple_assign.py + > Line 15: ~call_3 = ret_make_response(~call_4) + File: examples/vulnerable_code/XSS_variable_multiple_assign.py + > Line 15: resp = ~call_3 + File: examples/vulnerable_code/XSS_variable_multiple_assign.py + > Line 17: ret_XSS1 = resp + File: examples/vulnerable_code/XSS_variable_multiple_assign.py + > reaches line 15, trigger word "replace(": + ~call_4 = ret_html.replace('{{ param }}', another_one) + """ + + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + + +class EngineDjangoTest(BaseTestCase): + def run_empty(self): + return + + def run_analysis(self, path): + self.cfg_create_from_file(path) + cfg_list = [self.cfg] + + FrameworkAdaptor(cfg_list, [], [], is_django_view_function) + initialize_constraint_table(cfg_list) + + analyse(cfg_list, analysis_type=ReachingDefinitionsTaintAnalysis) + + trigger_word_file = os.path.join( + 'pyt', + 'vulnerability_definitions', + 'django_trigger_words.pyt' + ) + + return vulnerabilities.find_vulnerabilities( + cfg_list, + ReachingDefinitionsTaintAnalysis, + UImode.NORMAL, + VulnerabilityFiles( + default_blackbox_mapping_file, + trigger_word_file + ), + nosec_lines + ) + + def test_django_view_param(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/django_XSS.py') + self.assert_length(vulnerabilities, expected_length=2) + vulnerability_description = str(vulnerabilities[0]) + + EXPECTED_VULNERABILITY_DESCRIPTION = """ + File: examples/vulnerable_code/django_XSS.py + > User input at line 4, trigger word "Framework function URL parameter": + param + Reassigned in: + File: examples/vulnerable_code/django_XSS.py + > Line 5: ret_xss1 = ~call_1 + File: examples/vulnerable_code/django_XSS.py + > reaches line 5, trigger word "render(": + ~call_1 = ret_render(request, 'templates/xss.html', 'param'param) + """ + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + + +class EngineEveryTest(BaseTestCase): + def run_empty(self): + return + + def run_analysis(self, path): + self.cfg_create_from_file(path) + cfg_list = [self.cfg] + + FrameworkAdaptor(cfg_list, [], [], is_function) + initialize_constraint_table(cfg_list) + + analyse(cfg_list, analysis_type=ReachingDefinitionsTaintAnalysis) + + trigger_word_file = os.path.join( + 'pyt', + 'vulnerability_definitions', + 'all_trigger_words.pyt' + ) + + return vulnerabilities.find_vulnerabilities( + cfg_list, + ReachingDefinitionsTaintAnalysis, + UImode.NORMAL, + VulnerabilityFiles( + default_blackbox_mapping_file, + trigger_word_file + ), + nosec_lines + ) - def test_no_false_positive_import_file_command_injection_3(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/no_false_positive_import_file_command_injection_3.py') + def test_self_is_not_tainted(self): + vulnerabilities = self.run_analysis('examples/example_inputs/def_with_self_as_first_arg.py') self.assert_length(vulnerabilities, expected_length=0) From 69d019327eca9f916aefbaad2cc2c433309c36bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Thu, 26 Apr 2018 18:54:51 +0300 Subject: [PATCH 058/291] added empty nosec_lines for tests --- tests/vulnerabilities_across_files_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/vulnerabilities_across_files_test.py b/tests/vulnerabilities_across_files_test.py index 7492aee2..b30d415b 100644 --- a/tests/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities_across_files_test.py @@ -15,7 +15,7 @@ from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis from pyt.vulnerabilities import find_vulnerabilities - +nosec_lines = set() class EngineTest(BaseTestCase): def run_analysis(self, path): path = os.path.normpath(path) @@ -40,7 +40,8 @@ def run_analysis(self, path): VulnerabilityFiles( default_blackbox_mapping_file, default_trigger_word_file - ) + ), + nosec_lines ) def test_find_vulnerabilities_absolute_from_file_command_injection(self): From 3cb5186942473d7a8fceb851e09cb36133355c7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Thu, 26 Apr 2018 18:57:10 +0300 Subject: [PATCH 059/291] Added ignore-nosec argument --- tests/command_line_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/command_line_test.py b/tests/command_line_test.py index a4663225..25d06a73 100644 --- a/tests/command_line_test.py +++ b/tests/command_line_test.py @@ -28,6 +28,7 @@ def test_no_args(self): [-m BLACKBOX_MAPPING_FILE] [-py2] [-l LOG_LEVEL] [-a ADAPTOR] [-db] [-dl DRAW_LATTICE [DRAW_LATTICE ...]] [-j] [-li | -re | -rt] [-ppm] [-b BASELINE] + [--ignore-nosec] {save,github_search} ...\n""" + \ "python -m pyt: error: one of the arguments " + \ "-f/--filepath -gr/--git-repos is required\n" From ec6d23acbe87bc0cc0f97fa91bb5bdca4f58a4cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Thu, 26 Apr 2018 20:51:55 +0300 Subject: [PATCH 060/291] Update vulnerabilities_test.py --- tests/vulnerabilities_test.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index 381860ae..b4698e91 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -23,7 +23,6 @@ from pyt.node_types import Node from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis -nosec_lines = set() class EngineTest(BaseTestCase): def run_empty(self): return @@ -93,7 +92,7 @@ def test_find_triggers(self): XSS1 = cfg_list[1] trigger_words = [('get', [])] - l = vulnerabilities.find_triggers(XSS1.nodes, trigger_words, nosec_lines) + l = vulnerabilities.find_triggers(XSS1.nodes, trigger_words) self.assert_length(l, expected_length=1) def test_find_sanitiser_nodes(self): @@ -138,8 +137,7 @@ def run_analysis(self, path): VulnerabilityFiles( default_blackbox_mapping_file, default_trigger_word_file - ), - nosec_lines + ) ) def test_find_vulnerabilities_assign_other_var(self): @@ -534,8 +532,7 @@ def run_analysis(self, path): VulnerabilityFiles( default_blackbox_mapping_file, trigger_word_file - ), - nosec_lines + ) ) def test_django_view_param(self): @@ -583,8 +580,7 @@ def run_analysis(self, path): VulnerabilityFiles( default_blackbox_mapping_file, trigger_word_file - ), - nosec_lines + ) ) def test_self_is_not_tainted(self): From 6f099120464ed17dad1546ddd32556d68d842fe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Thu, 26 Apr 2018 20:52:46 +0300 Subject: [PATCH 061/291] unnecessary codes removed --- tests/vulnerabilities_across_files_test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/vulnerabilities_across_files_test.py b/tests/vulnerabilities_across_files_test.py index b30d415b..25dd1267 100644 --- a/tests/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities_across_files_test.py @@ -15,7 +15,6 @@ from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis from pyt.vulnerabilities import find_vulnerabilities -nosec_lines = set() class EngineTest(BaseTestCase): def run_analysis(self, path): path = os.path.normpath(path) @@ -40,8 +39,7 @@ def run_analysis(self, path): VulnerabilityFiles( default_blackbox_mapping_file, default_trigger_word_file - ), - nosec_lines + ) ) def test_find_vulnerabilities_absolute_from_file_command_injection(self): From 9c4dea6e511f76abca8feb396ecf1e6f2e1565fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Thu, 26 Apr 2018 20:53:40 +0300 Subject: [PATCH 062/291] added default nosec_lines --- pyt/vulnerabilities.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index eeb9404a..ecf76e78 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -172,7 +172,7 @@ def append_node_if_reassigned( def find_triggers( nodes, trigger_words, - nosec_lines + nosec_lines = set() ): """Find triggers from the trigger_word_list in the nodes. @@ -470,7 +470,7 @@ def find_vulnerabilities_in_cfg( ui_mode, blackbox_mapping, vulnerabilities_list, - nosec_lines + nosec_lines = set() ): """Find vulnerabilities in a cfg. From 882b51939a3c5350458906ee886b591311ef8c2d Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 26 Apr 2018 18:23:46 -0700 Subject: [PATCH 063/291] Refactored out string_compare_alpha and string_compare_alnum out of base_test_case --- tests/analysis/analysis_base_test_case.py | 6 ++++++ tests/base_test_case.py | 14 -------------- .../vulnerabilities_across_files_test.py | 4 ++-- .../vulnerabilities_base_test_case.py | 10 ++++++++++ tests/vulnerabilities/vulnerabilities_test.py | 8 ++++---- 5 files changed, 22 insertions(+), 20 deletions(-) create mode 100644 tests/vulnerabilities/vulnerabilities_base_test_case.py diff --git a/tests/analysis/analysis_base_test_case.py b/tests/analysis/analysis_base_test_case.py index 76037c7e..e92efb19 100644 --- a/tests/analysis/analysis_base_test_case.py +++ b/tests/analysis/analysis_base_test_case.py @@ -68,3 +68,9 @@ def run_analysis(self, path): self.analysis = FixedPointAnalysis(self.cfg) self.analysis.fixpoint_runner() return Lattice(self.cfg.nodes) + + def string_compare_alnum(self, output, expected_string): + return ( + [char for char in output if char.isalnum()] == + [char for char in expected_string if char.isalnum()] + ) diff --git a/tests/base_test_case.py b/tests/base_test_case.py index b2b70dca..a8d31f94 100644 --- a/tests/base_test_case.py +++ b/tests/base_test_case.py @@ -75,17 +75,3 @@ def cfg_create_from_file( local_modules, filename ) - - def string_compare_alpha(self, output, expected_string): - # Only used in vulnerability tests - return ( - [char for char in output if char.isalpha()] == - [char for char in expected_string if char.isalpha()] - ) - - def string_compare_alnum(self, output, expected_string): - # Only used in reaching_definitions_taint_test - return ( - [char for char in output if char.isalnum()] == - [char for char in expected_string if char.isalnum()] - ) diff --git a/tests/vulnerabilities/vulnerabilities_across_files_test.py b/tests/vulnerabilities/vulnerabilities_across_files_test.py index 026c38ef..b33982ab 100644 --- a/tests/vulnerabilities/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities/vulnerabilities_across_files_test.py @@ -1,6 +1,6 @@ import os -from ..base_test_case import BaseTestCase +from .vulnerabilities_base_test_case import VulnerabilitiesBaseTestCase from pyt.analysis.constraint_table import initialize_constraint_table from pyt.analysis.fixed_point import analyse @@ -22,7 +22,7 @@ ) -class EngineTest(BaseTestCase): +class EngineTest(VulnerabilitiesBaseTestCase): def run_analysis(self, path): path = os.path.normpath(path) diff --git a/tests/vulnerabilities/vulnerabilities_base_test_case.py b/tests/vulnerabilities/vulnerabilities_base_test_case.py new file mode 100644 index 00000000..dcf088a4 --- /dev/null +++ b/tests/vulnerabilities/vulnerabilities_base_test_case.py @@ -0,0 +1,10 @@ +from ..base_test_case import BaseTestCase + + +class VulnerabilitiesBaseTestCase(BaseTestCase): + + def string_compare_alpha(self, output, expected_string): + return ( + [char for char in output if char.isalpha()] == + [char for char in expected_string if char.isalpha()] + ) diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 7313a766..a1d48fd1 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -1,6 +1,6 @@ import os -from ..base_test_case import BaseTestCase +from .vulnerabilities_base_test_case import VulnerabilitiesBaseTestCase from pyt.analysis.constraint_table import initialize_constraint_table from pyt.analysis.fixed_point import analyse @@ -23,7 +23,7 @@ ) -class EngineTest(BaseTestCase): +class EngineTest(VulnerabilitiesBaseTestCase): def test_parse(self): definitions = trigger_definitions_parser.parse( trigger_word_file=os.path.join( @@ -496,7 +496,7 @@ def test_XSS_variable_multiple_assign_result(self): self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) -class EngineDjangoTest(BaseTestCase): +class EngineDjangoTest(VulnerabilitiesBaseTestCase): def run_analysis(self, path): self.cfg_create_from_file(path) cfg_list = [self.cfg] @@ -538,7 +538,7 @@ def test_django_view_param(self): self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) -class EngineEveryTest(BaseTestCase): +class EngineEveryTest(VulnerabilitiesBaseTestCase): def run_analysis(self, path): self.cfg_create_from_file(path) cfg_list = [self.cfg] From 7c81a160db72b8810ccc80cedf5bfc4bf03c4fe2 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 26 Apr 2018 18:37:21 -0700 Subject: [PATCH 064/291] Refactored out assertInCfg, assertLineNumber and cfg_list_to_dict out of base_test_case into CFGBaseTestCase --- tests/base_test_case.py | 48 ------------------------------- tests/cfg/cfg_base_test_case.py | 50 +++++++++++++++++++++++++++++++++ tests/cfg/cfg_test.py | 26 ++++++++--------- 3 files changed, 63 insertions(+), 61 deletions(-) create mode 100644 tests/cfg/cfg_base_test_case.py diff --git a/tests/base_test_case.py b/tests/base_test_case.py index a8d31f94..21b7c695 100644 --- a/tests/base_test_case.py +++ b/tests/base_test_case.py @@ -9,54 +9,6 @@ class BaseTestCase(unittest.TestCase): """A base class that has helper methods for testing PyT.""" - def assertInCfg(self, connections): - """Asserts that all connections in the connections list exists in the cfg, - as well as that all connections not in the list do not exist. - - Args: - connections(list[tuple]): the node at index 0 of the tuple has - to be in the new_constraint set of the node - at index 1 of the tuple. - """ - for connection in connections: - self.assertIn( - self.cfg.nodes[connection[0]], - self.cfg.nodes[connection[1]].outgoing, - str(connection) + " expected to be connected" - ) - self.assertIn( - self.cfg.nodes[connection[1]], - self.cfg.nodes[connection[0]].ingoing, - str(connection) + " expected to be connected" - ) - - nodes = len(self.cfg.nodes) - - for element in range(nodes): - for sets in range(nodes): - if not (element, sets) in connections: - self.assertNotIn( - self.cfg.nodes[element], - self.cfg.nodes[sets].outgoing, - "(%s <- %s)" % (element, sets) + " expected to be disconnected" - ) - self.assertNotIn( - self.cfg.nodes[sets], - self.cfg.nodes[element].ingoing, - "(%s <- %s)" % (sets, element) + " expected to be disconnected" - ) - - def assertLineNumber(self, node, line_number): - """Only used in cfg_test.""" - self.assertEqual(node.line_number, line_number) - - def cfg_list_to_dict(self, list): - """This method converts the CFG list to a dict, making it easier to find nodes to test. - This method assumes that no nodes in the code have the same label. - Only used in cfg_test. - """ - return {x.label: x for x in list} - def assert_length(self, _list, *, expected_length): actual_length = len(_list) self.assertEqual(expected_length, actual_length) diff --git a/tests/cfg/cfg_base_test_case.py b/tests/cfg/cfg_base_test_case.py new file mode 100644 index 00000000..5d6d0f8f --- /dev/null +++ b/tests/cfg/cfg_base_test_case.py @@ -0,0 +1,50 @@ +from ..base_test_case import BaseTestCase + + +class CFGBaseTestCase(BaseTestCase): + + def assertInCfg(self, connections): + """Asserts that all connections in the connections list exists in the cfg, + as well as that all connections not in the list do not exist. + + Args: + connections(list[tuple]): the node at index 0 of the tuple has + to be in the new_constraint set of the node + at index 1 of the tuple. + """ + for connection in connections: + self.assertIn( + self.cfg.nodes[connection[0]], + self.cfg.nodes[connection[1]].outgoing, + str(connection) + " expected to be connected" + ) + self.assertIn( + self.cfg.nodes[connection[1]], + self.cfg.nodes[connection[0]].ingoing, + str(connection) + " expected to be connected" + ) + + nodes = len(self.cfg.nodes) + + for element in range(nodes): + for sets in range(nodes): + if not (element, sets) in connections: + self.assertNotIn( + self.cfg.nodes[element], + self.cfg.nodes[sets].outgoing, + "(%s <- %s)" % (element, sets) + " expected to be disconnected" + ) + self.assertNotIn( + self.cfg.nodes[sets], + self.cfg.nodes[element].ingoing, + "(%s <- %s)" % (sets, element) + " expected to be disconnected" + ) + + def assertLineNumber(self, node, line_number): + self.assertEqual(node.line_number, line_number) + + def cfg_list_to_dict(self, list): + """This method converts the CFG list to a dict, making it easier to find nodes to test. + This method assumes that no nodes in the code have the same label. + """ + return {x.label: x for x in list} diff --git a/tests/cfg/cfg_test.py b/tests/cfg/cfg_test.py index e9a1f489..ceb9c5c5 100644 --- a/tests/cfg/cfg_test.py +++ b/tests/cfg/cfg_test.py @@ -1,4 +1,4 @@ -from ..base_test_case import BaseTestCase +from .cfg_base_test_case import CFGBaseTestCase from pyt.core.node_types import ( EntryOrExitNode, @@ -6,7 +6,7 @@ ) -class CFGGeneralTest(BaseTestCase): +class CFGGeneralTest(CFGBaseTestCase): def test_repr_cfg(self): self.cfg_create_from_file('examples/example_inputs/for_complete.py') @@ -60,7 +60,7 @@ def test_str_ignored(self): self.assertEqual(expected_label, actual_label) -class CFGForTest(BaseTestCase): +class CFGForTest(CFGBaseTestCase): def test_for_complete(self): self.cfg_create_from_file('examples/example_inputs/for_complete.py') @@ -177,7 +177,7 @@ def test_for_func_iterator(self): ]) -class CFGTryTest(BaseTestCase): +class CFGTryTest(CFGBaseTestCase): def connected(self, node, successor): return (successor, node) @@ -285,7 +285,7 @@ def test_final(self): self.connected(print_final, _exit)]) -class CFGIfTest(BaseTestCase): +class CFGIfTest(CFGBaseTestCase): def test_if_complete(self): self.cfg_create_from_file('examples/example_inputs/if_complete.py') @@ -487,7 +487,7 @@ def test_if_not(self): ]) -class CFGWhileTest(BaseTestCase): +class CFGWhileTest(CFGBaseTestCase): def test_while_complete(self): self.cfg_create_from_file('examples/example_inputs/while_complete.py') @@ -558,7 +558,7 @@ def test_while_line_numbers(self): self.assertLineNumber(next_stmt, 7) -class CFGAssignmentMultiTest(BaseTestCase): +class CFGAssignmentMultiTest(CFGBaseTestCase): def test_assignment_multi_target(self): self.cfg_create_from_file('examples/example_inputs/assignment_two_targets.py') @@ -669,7 +669,7 @@ def test_assignment_tuple_value(self): self.assertEqual(self.cfg.nodes[node].label, 'a = (x, y)') -class CFGComprehensionTest(BaseTestCase): +class CFGComprehensionTest(CFGBaseTestCase): def test_nodes(self): self.cfg_create_from_file('examples/example_inputs/comprehensions.py') @@ -717,7 +717,7 @@ def test_dict_comprehension_multi(self): self.assertEqual(listcomp.label, 'dd = {x + y : y for x in [1, 2, 3] for y in [4, 5, 6]}') -class CFGFunctionNodeTest(BaseTestCase): +class CFGFunctionNodeTest(CFGBaseTestCase): def connected(self, node, successor): return (successor, node) @@ -1167,7 +1167,7 @@ def test_call_on_call(self): self.cfg_create_from_file(path) -class CFGCallWithAttributeTest(BaseTestCase): +class CFGCallWithAttributeTest(CFGBaseTestCase): def setUp(self): self.cfg_create_from_file('examples/example_inputs/call_with_attribute.py') @@ -1187,7 +1187,7 @@ def test_call_with_attribute_line_numbers(self): self.assertLineNumber(call, 5) -class CFGBreak(BaseTestCase): +class CFGBreak(CFGBaseTestCase): """Break in while and for and other places""" def test_break(self): self.cfg_create_from_file('examples/example_inputs/while_break.py') @@ -1215,7 +1215,7 @@ def test_break(self): (_exit, print_next)]) -class CFGNameConstant(BaseTestCase): +class CFGNameConstant(CFGBaseTestCase): def setUp(self): self.cfg_create_from_file('examples/example_inputs/name_constant.py') @@ -1233,7 +1233,7 @@ def test_name_constant_if(self): self.assertEqual(expected_label, actual_label) -class CFGName(BaseTestCase): +class CFGName(CFGBaseTestCase): """Test is Name nodes are properly handled in different contexts""" def test_name_if(self): From 3c3f6f5ce09eecf8656866c1ea62ecf01bcfb5df Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 26 Apr 2018 19:18:23 -0700 Subject: [PATCH 065/291] Added a test for usage.py --- .coveragerc | 1 - pyt/__main__.py | 4 +- pyt/usage.py | 25 ++++++------- tests/usage_test.py | 91 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+), 17 deletions(-) create mode 100644 tests/usage_test.py diff --git a/.coveragerc b/.coveragerc index bfd2571f..19ad217b 100644 --- a/.coveragerc +++ b/.coveragerc @@ -16,6 +16,5 @@ source = ./tests omit = pyt/__main__.py - pyt/usage.py pyt/formatters/json.py pyt/formatters/text.py diff --git a/pyt/__main__.py b/pyt/__main__.py index c1bc68ef..71571e4b 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -40,8 +40,8 @@ def main(command_line_args=sys.argv[1:]): ui_mode = UImode.TRIM path = os.path.normpath(args.filepath) - if args.root_directory: - directory = os.path.normpath(args.root_directory) + if args.project_root: + directory = os.path.normpath(args.project_root) else: directory = os.path.dirname(path) project_modules = get_modules(directory) diff --git a/pyt/usage.py b/pyt/usage.py index d2cf2d98..617ca3d3 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -33,13 +33,6 @@ def _add_required_group(parser): help='Path to the file that should be analysed.', type=str ) - required_group.add_argument( - '-r', '--root-directory', - help='Add project root, this is important when the entry ' - 'file is not at the root of the project.', - type=str, - metavar='DIR_TO_ANALYZE' - ) def _add_optional_group(parser): @@ -51,6 +44,12 @@ def _add_optional_group(parser): 'Flask(Default), Django, Every or Pylons', type=str ) + optional_group.add_argument( + '-pr', '--project-root', + help='Add project root, only important when the entry ' + 'file is not at the root of the project.', + type=str + ) optional_group.add_argument( '-b', '--baseline', help='Path of a baseline report to compare against ' @@ -80,26 +79,24 @@ def _add_optional_group(parser): def _add_print_group(parser): - print_group = parser.add_mutually_exclusive_group() + print_group = parser.add_argument_group('print arguments') print_group.add_argument( '-trim', '--trim-reassigned-in', - help='Trims the reassigned list to the vulnerability chain.', + help='Trims the reassigned list to just the vulnerability chain.', action='http://github.com/store_true', default=True ) print_group.add_argument( '-i', '--interactive', - help='Will ask you about each vulnerability chain and blackbox nodes.', + help='Will ask you about each blackbox function call in vulnerability chains.', action='http://github.com/store_true', default=False ) def _check_required_and_mutually_exclusive_args(parser, args): - if args.filepath is None and args.root_directory is None: - parser.error('one of the arguments -f/--filepath -r/--root-directory is required') - if args.filepath and args.root_directory: - parser.error('argument -f/--filepath: not allowed with argument -r/--root-directory') + if args.filepath is None: + parser.error('The -f/--filepath argument is required') if args.trim_reassigned_in and args.interactive: parser.error('argument -i/--interactive: not allowed with argument -trim/--trim-reassigned-in') diff --git a/tests/usage_test.py b/tests/usage_test.py new file mode 100644 index 00000000..8a495541 --- /dev/null +++ b/tests/usage_test.py @@ -0,0 +1,91 @@ +"""This just tests usage.py""" +import sys +from contextlib import contextmanager +from io import StringIO + +from .base_test_case import BaseTestCase +from pyt.usage import parse_args + + +@contextmanager +def capture_sys_output(): + capture_out, capture_err = StringIO(), StringIO() + current_out, current_err = sys.stdout, sys.stderr + try: + sys.stdout, sys.stderr = capture_out, capture_err + yield capture_out, capture_err + finally: + sys.stdout, sys.stderr = current_out, current_err + + +class UsageTest(BaseTestCase): + def test_no_args(self): + with self.assertRaises(SystemExit): + with capture_sys_output() as (stdout, _): + parse_args([]) + + self.maxDiff = None + + EXPECTED = """usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] + [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] + [-t TRIGGER_WORD_FILE] [-trim] [-i] + +required arguments: + -f FILEPATH, --filepath FILEPATH + Path to the file that should be analysed. + +optional arguments: + -a ADAPTOR, --adaptor ADAPTOR + Choose a web framework adaptor: Flask(Default), + Django, Every or Pylons + -pr PROJECT_ROOT, --project-root PROJECT_ROOT + Add project root, only important when the entry file + is not at the root of the project. + -b BASELINE_JSON_FILE, --baseline BASELINE_JSON_FILE + Path of a baseline report to compare against (only + JSON-formatted files are accepted) + -j, --json Prints JSON instead of report. + -m BLACKBOX_MAPPING_FILE, --blackbox-mapping-file BLACKBOX_MAPPING_FILE + Input blackbox mapping file. + -t TRIGGER_WORD_FILE, --trigger-word-file TRIGGER_WORD_FILE + Input file with a list of sources and sinks + +print arguments: + -trim, --trim-reassigned-in + Trims the reassigned list to just the vulnerability + chain. + -i, --interactive Will ask you about each blackbox function call in + vulnerability chains.\n""" + + self.assertEqual(stdout.getvalue(), EXPECTED) + + def test_valid_args_but_no_filepath(self): + with self.assertRaises(SystemExit): + with capture_sys_output() as (_, stderr): + parse_args(['-j']) + + EXPECTED = """usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] + [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] + [-t TRIGGER_WORD_FILE] [-trim] [-i] +python -m pyt: error: The -f/--filepath argument is required\n""" + + self.assertEqual(stderr.getvalue(), EXPECTED) + + def test_using_both_mutually_exclusive_args(self): + with self.assertRaises(SystemExit): + with capture_sys_output() as (_, stderr): + parse_args(['-f', 'foo.py', '-trim', '--interactive']) + + EXPECTED = """usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] + [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] + [-t TRIGGER_WORD_FILE] [-trim] [-i] +python -m pyt: error: argument -i/--interactive: not allowed with argument -trim/--trim-reassigned-in\n""" + + self.assertEqual(stderr.getvalue(), EXPECTED) + + def test_normal_usage(self): + with capture_sys_output() as (stdout, stderr): + parse_args(['-f', 'foo.py']) + + self.assertEqual(stdout.getvalue(), '') + self.assertEqual(stderr.getvalue(), '') From 609acd6ca7e3800f21e82d5e3736e8112a91b49c Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 26 Apr 2018 20:41:53 -0700 Subject: [PATCH 066/291] Added a test for __main__ and the outfile option --- .coveragerc | 1 - pyt/__main__.py | 6 ++--- pyt/usage.py | 9 +++++++ tests/main_test.py | 60 +++++++++++++++++++++++++++++++++++++++++++++ tests/usage_test.py | 9 ++++--- 5 files changed, 77 insertions(+), 8 deletions(-) create mode 100644 tests/main_test.py diff --git a/.coveragerc b/.coveragerc index 19ad217b..c7e7a385 100644 --- a/.coveragerc +++ b/.coveragerc @@ -15,6 +15,5 @@ source = ./pyt ./tests omit = - pyt/__main__.py pyt/formatters/json.py pyt/formatters/text.py diff --git a/pyt/__main__.py b/pyt/__main__.py index 71571e4b..9007e412 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -55,7 +55,7 @@ def main(command_line_args=sys.argv[1:]): local_modules, path ) - cfg_list = list(cfg) + cfg_list = [cfg] framework_route_criteria = is_flask_route_function if args.adaptor: if args.adaptor.lower().startswith('e'): @@ -87,9 +87,9 @@ def main(command_line_args=sys.argv[1:]): ) if args.json: - json.report(vulnerabilities, sys.stdout) + json.report(vulnerabilities, args.output_file) else: - text.report(vulnerabilities, sys.stdout) + text.report(vulnerabilities, args.output_file) if __name__ == '__main__': diff --git a/pyt/usage.py b/pyt/usage.py index 617ca3d3..d633cf79 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -1,5 +1,6 @@ import argparse import os +import sys from datetime import datetime @@ -76,6 +77,14 @@ def _add_optional_group(parser): type=str, default=default_trigger_word_file ) + optional_group.add_argument( + '-o', '--output', + help='write report to filename', + dest='output_file', + action='http://github.com/store', + type=argparse.FileType('w'), + default=sys.stdout, + ) def _add_print_group(parser): diff --git a/tests/main_test.py b/tests/main_test.py new file mode 100644 index 00000000..eea6ff47 --- /dev/null +++ b/tests/main_test.py @@ -0,0 +1,60 @@ +import mock + +from .base_test_case import BaseTestCase +from pyt.__main__ import main + + +class MainTest(BaseTestCase): + @mock.patch('pyt.__main__.parse_args') + @mock.patch('pyt.__main__.find_vulnerabilities') + @mock.patch('pyt.__main__.text') + def test_text_output(self, mock_text, mock_find_vulnerabilities, mock_parse_args): + mock_find_vulnerabilities.return_value = 'stuff' + example_file = 'examples/vulnerable_code/inter_command_injection.py' + output_file = 'mocked_outfile' + + mock_parse_args.return_value = mock.Mock( + autospec=True, + filepath=example_file, + project_root=None, + baseline=None, + json=None, + output_file=output_file + ) + main([ + 'parse_args is mocked' + ]) + assert mock_text.report.call_count == 1 + # This with: makes no sense + with self.assertRaises(AssertionError): + assert mock_text.report.assert_called_with( + mock_find_vulnerabilities.return_value, + mock_parse_args.return_value.output_file + ) + + @mock.patch('pyt.__main__.parse_args') + @mock.patch('pyt.__main__.find_vulnerabilities') + @mock.patch('pyt.__main__.json') + def test_json_output(self, mock_json, mock_find_vulnerabilities, mock_parse_args): + mock_find_vulnerabilities.return_value = 'stuff' + example_file = 'examples/vulnerable_code/inter_command_injection.py' + output_file = 'mocked_outfile' + + mock_parse_args.return_value = mock.Mock( + autospec=True, + filepath=example_file, + project_root=None, + baseline=None, + json=True, + output_file=output_file + ) + main([ + 'parse_args is mocked' + ]) + assert mock_json.report.call_count == 1 + # This with: makes no sense + with self.assertRaises(AssertionError): + assert mock_json.report.assert_called_with( + mock_find_vulnerabilities.return_value, + mock_parse_args.return_value.output_file + ) diff --git a/tests/usage_test.py b/tests/usage_test.py index 8a495541..2b8673c9 100644 --- a/tests/usage_test.py +++ b/tests/usage_test.py @@ -1,4 +1,3 @@ -"""This just tests usage.py""" import sys from contextlib import contextmanager from io import StringIO @@ -28,7 +27,7 @@ def test_no_args(self): EXPECTED = """usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] - [-t TRIGGER_WORD_FILE] [-trim] [-i] + [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [-trim] [-i] required arguments: -f FILEPATH, --filepath FILEPATH @@ -49,6 +48,8 @@ def test_no_args(self): Input blackbox mapping file. -t TRIGGER_WORD_FILE, --trigger-word-file TRIGGER_WORD_FILE Input file with a list of sources and sinks + -o OUTPUT_FILE, --output OUTPUT_FILE + write report to filename print arguments: -trim, --trim-reassigned-in @@ -66,7 +67,7 @@ def test_valid_args_but_no_filepath(self): EXPECTED = """usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] - [-t TRIGGER_WORD_FILE] [-trim] [-i] + [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [-trim] [-i] python -m pyt: error: The -f/--filepath argument is required\n""" self.assertEqual(stderr.getvalue(), EXPECTED) @@ -78,7 +79,7 @@ def test_using_both_mutually_exclusive_args(self): EXPECTED = """usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] - [-t TRIGGER_WORD_FILE] [-trim] [-i] + [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [-trim] [-i] python -m pyt: error: argument -i/--interactive: not allowed with argument -trim/--trim-reassigned-in\n""" self.assertEqual(stderr.getvalue(), EXPECTED) From 18609661f44a7a49fd43510f29f41098515578b8 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 26 Apr 2018 20:53:16 -0700 Subject: [PATCH 067/291] Per-directory READMEs, coming soon --- pyt/analysis/README.rst | 1 + pyt/cfg/README.rst | 1 + pyt/core/README.rst | 1 + pyt/helper_visitors/README.rst | 1 + pyt/vulnerabilities/README.rst | 1 + pyt/vulnerability_definitions/README.rst | 1 + pyt/web_frameworks/README.rst | 1 + 7 files changed, 7 insertions(+) create mode 100644 pyt/analysis/README.rst create mode 100644 pyt/cfg/README.rst create mode 100644 pyt/core/README.rst create mode 100644 pyt/helper_visitors/README.rst create mode 100644 pyt/vulnerabilities/README.rst create mode 100644 pyt/vulnerability_definitions/README.rst create mode 100644 pyt/web_frameworks/README.rst diff --git a/pyt/analysis/README.rst b/pyt/analysis/README.rst new file mode 100644 index 00000000..3ba5b13c --- /dev/null +++ b/pyt/analysis/README.rst @@ -0,0 +1 @@ +Coming soon. diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst new file mode 100644 index 00000000..3ba5b13c --- /dev/null +++ b/pyt/cfg/README.rst @@ -0,0 +1 @@ +Coming soon. diff --git a/pyt/core/README.rst b/pyt/core/README.rst new file mode 100644 index 00000000..3ba5b13c --- /dev/null +++ b/pyt/core/README.rst @@ -0,0 +1 @@ +Coming soon. diff --git a/pyt/helper_visitors/README.rst b/pyt/helper_visitors/README.rst new file mode 100644 index 00000000..3ba5b13c --- /dev/null +++ b/pyt/helper_visitors/README.rst @@ -0,0 +1 @@ +Coming soon. diff --git a/pyt/vulnerabilities/README.rst b/pyt/vulnerabilities/README.rst new file mode 100644 index 00000000..3ba5b13c --- /dev/null +++ b/pyt/vulnerabilities/README.rst @@ -0,0 +1 @@ +Coming soon. diff --git a/pyt/vulnerability_definitions/README.rst b/pyt/vulnerability_definitions/README.rst new file mode 100644 index 00000000..3ba5b13c --- /dev/null +++ b/pyt/vulnerability_definitions/README.rst @@ -0,0 +1 @@ +Coming soon. diff --git a/pyt/web_frameworks/README.rst b/pyt/web_frameworks/README.rst new file mode 100644 index 00000000..3ba5b13c --- /dev/null +++ b/pyt/web_frameworks/README.rst @@ -0,0 +1 @@ +Coming soon. From f4ebbffe64fc39961b34f4876927d310c1dc725a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Fri, 27 Apr 2018 11:40:40 +0300 Subject: [PATCH 068/291] Update vulnerabilities.py --- pyt/vulnerabilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index ecf76e78..b9a0c96b 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -74,7 +74,7 @@ def identify_triggers( sources, sinks, lattice, - nosec_lines + nosec_lines = set() ): """Identify sources, sinks and sanitisers in a CFG. From 7c872a039d0817868587be95c6caf1e916cee5d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Fri, 27 Apr 2018 11:41:52 +0300 Subject: [PATCH 069/291] Update vulnerabilities.py --- pyt/vulnerabilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index b9a0c96b..d5bc2e1b 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -509,7 +509,7 @@ def find_vulnerabilities( analysis_type, ui_mode, vulnerability_files, - nosec_lines + nosec_lines = set() ): """Find vulnerabilities in a list of CFGs from a trigger_word_file. From 0f0140448a6cbf0c7a0038d92a960cdbc9686396 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 27 Apr 2018 18:21:46 -0700 Subject: [PATCH 070/291] [many readme] start to write a bit --- pyt/README.rst | 56 +++++++++++++++++++++++++++++++++++ pyt/analysis/README.rst | 2 ++ pyt/cfg/README.rst | 19 +++++++++++- pyt/cfg/make_cfg.py | 4 +-- pyt/web_frameworks/README.rst | 4 +++ 5 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 pyt/README.rst diff --git a/pyt/README.rst b/pyt/README.rst new file mode 100644 index 00000000..90ea58c6 --- /dev/null +++ b/pyt/README.rst @@ -0,0 +1,56 @@ +`__main__.py`_ is where all the high-level steps happen. + +.. _\_\_main\_\_.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/__main__.py + +Step 1 + Parse command line arguments. + + `parse_args` with `usage.py`_ + + .. _usage.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/usage.py + + +Step 2 + Generate the Abstract Syntax Tree (AST). + + Essentially done in these lines of code with the `ast`_ module: + + .. code-block:: python + + import ast + ast.parse(f.read()) + + `generate_ast`_ from `ast_helper.py`_ + + .. _ast: https://docs.python.org/3/library/ast.html + .. _generate_ast: https://github.com/python-security/pyt/blob/re_organize_code/pyt/core/ast_helper.py#L24 + .. _ast_helper.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/core/ast_helper.py + + +Step 3 + Pass the AST to create a `Control Flow Graph (CFG)`_ + + .. _Control Flow Graph (CFG): https://github.com/python-security/pyt/tree/re_organize_code/pyt/cfg + +Step 4 + Pass the CFG to a `Framework Adaptor`_, which will mark the arguments of certain functions as tainted sources. + + .. _Framework Adaptor: https://github.com/python-security/pyt/tree/re_organize_code/pyt/web_frameworks + +Step 5 + Perform (modified-)reaching definitions `analysis`_, to know where definitions reach. + + .. _analysis: https://github.com/python-security/pyt/tree/re_organize_code/pyt/analysis + +Step 6 + Find `vulnerabilities`_, by seeing where sources reach, and how. + + .. _vulnerabilities: https://github.com/python-security/pyt/tree/re_organize_code/pyt/vulnerabilities + +Step 7 + Remove the already vulnerabilities if a baseline (JSON file of a previous run of PyT) is provided. + +Step 8 + Output the results in either `text or JSON form`_, to stdout or the outfile. + + .. _text or JSON form: https://github.com/python-security/pyt/tree/re_organize_code/pyt/formatters diff --git a/pyt/analysis/README.rst b/pyt/analysis/README.rst index 3ba5b13c..8f9883b0 100644 --- a/pyt/analysis/README.rst +++ b/pyt/analysis/README.rst @@ -1 +1,3 @@ Coming soon. + +This folder probably won't change at all for the lifetime of the project, unless we were to implement more advanced analyses like solving string constraints or doing alias analysis, right now and in the foreseeable future there are more pressing concerns, like handling web frameworks and handling all ast node types in the CFG construction. diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index 3ba5b13c..a649a944 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -1 +1,18 @@ -Coming soon. +These modules mirror the `abstract grammar`_ of Python. + +.. _abstract grammar: https://docs.python.org/3/library/ast.html#abstract-grammar + + +Dive into the raw ast NodeVisitor code. + + +Statements can contain expressions, but not the other way around, +so it was natural to have ExprVisitor inherit from StmtVisitor. + + +TODO: stmt_star_handler and expr_star_handler explanations and walk throughs. + + +For more information on AST nodes, see the `Green Tree Snakes`_ documentation. + +.. _Green Tree Snakes: https://greentreesnakes.readthedocs.io/en/latest/nodes.html diff --git a/pyt/cfg/make_cfg.py b/pyt/cfg/make_cfg.py index 710c5f66..eaa78c9b 100644 --- a/pyt/cfg/make_cfg.py +++ b/pyt/cfg/make_cfg.py @@ -20,14 +20,14 @@ def __str__(self): def make_cfg( - node, + tree, project_modules, local_modules, filename, module_definitions=None ): visitor = ExprVisitor( - node, + tree, project_modules, local_modules, filename, module_definitions diff --git a/pyt/web_frameworks/README.rst b/pyt/web_frameworks/README.rst index 3ba5b13c..aa1b121b 100644 --- a/pyt/web_frameworks/README.rst +++ b/pyt/web_frameworks/README.rst @@ -1 +1,5 @@ Coming soon. + + +Web frameworks +Sorry state of affairs From febb605096a2bc10f09ed6a7dd4e46626b31c44c Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 27 Apr 2018 18:30:42 -0700 Subject: [PATCH 071/291] Better links/Grammar --- pyt/README.rst | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pyt/README.rst b/pyt/README.rst index 90ea58c6..d69b8e72 100644 --- a/pyt/README.rst +++ b/pyt/README.rst @@ -5,8 +5,9 @@ Step 1 Parse command line arguments. - `parse_args` with `usage.py`_ + `parse_args`_ in `usage.py`_ + .. _parse_args: https://github.com/python-security/pyt/blob/re_organize_code/pyt/usage.py#L113 .. _usage.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/usage.py @@ -20,7 +21,7 @@ Step 2 import ast ast.parse(f.read()) - `generate_ast`_ from `ast_helper.py`_ + `generate_ast`_ in `ast_helper.py`_ .. _ast: https://docs.python.org/3/library/ast.html .. _generate_ast: https://github.com/python-security/pyt/blob/re_organize_code/pyt/core/ast_helper.py#L24 @@ -48,9 +49,13 @@ Step 6 .. _vulnerabilities: https://github.com/python-security/pyt/tree/re_organize_code/pyt/vulnerabilities Step 7 - Remove the already vulnerabilities if a baseline (JSON file of a previous run of PyT) is provided. + `Remove already known vulnerabilities`_ if a `baseline`_ (JSON file of a previous run of PyT) is provided. + + .. _Remove already known vulnerabilities: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerability_helper.py#L194 + .. _baseline: https://github.com/python-security/pyt/blob/re_organize_code/pyt/usage.py#L54 Step 8 - Output the results in either `text or JSON form`_, to stdout or the outfile. + Output the results in either `text or JSON form`_, to stdout or the `output file`_. .. _text or JSON form: https://github.com/python-security/pyt/tree/re_organize_code/pyt/formatters + .. _output file: https://github.com/python-security/pyt/blob/re_organize_code/pyt/usage.py#L80 From 780b48d109d18af4886913501553675cba1cc9ad Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 27 Apr 2018 18:33:00 -0700 Subject: [PATCH 072/291] Update README.rst --- pyt/README.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyt/README.rst b/pyt/README.rst index d69b8e72..611002a5 100644 --- a/pyt/README.rst +++ b/pyt/README.rst @@ -39,14 +39,14 @@ Step 4 .. _Framework Adaptor: https://github.com/python-security/pyt/tree/re_organize_code/pyt/web_frameworks Step 5 - Perform (modified-)reaching definitions `analysis`_, to know where definitions reach. + Perform `(modified-)reaching definitions analysis`_, to know where definitions reach. - .. _analysis: https://github.com/python-security/pyt/tree/re_organize_code/pyt/analysis + .. _\(modified\-\)reaching definitions analysis: https://github.com/python-security/pyt/tree/re_organize_code/pyt/analysis Step 6 - Find `vulnerabilities`_, by seeing where sources reach, and how. + `Find vulnerabilities`_, by seeing where sources reach, and how. - .. _vulnerabilities: https://github.com/python-security/pyt/tree/re_organize_code/pyt/vulnerabilities + .. _Find vulnerabilities: https://github.com/python-security/pyt/tree/re_organize_code/pyt/vulnerabilities Step 7 `Remove already known vulnerabilities`_ if a `baseline`_ (JSON file of a previous run of PyT) is provided. From 7acd6e8f34cc470e93aa9fa3dad47a440b9960a7 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 27 Apr 2018 18:48:04 -0700 Subject: [PATCH 073/291] Replace Install section with pip install --- README.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 65084da8..874a0c95 100644 --- a/README.rst +++ b/README.rst @@ -52,11 +52,9 @@ Example usage and output: Install ======= - 1. git clone https://github.com/python-security/pyt.git - 2. cd pyt/ - 3. python3 setup.py install - 4. pyt -h +.. code-block:: python + pip install python-taint Usage from Source ================= From e02aa41dfccccc937e624b297bcd9fa0046c17f7 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 27 Apr 2018 18:56:35 -0700 Subject: [PATCH 074/291] Added more to pyt/README.rst --- pyt/README.rst | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/pyt/README.rst b/pyt/README.rst index 90ea58c6..611002a5 100644 --- a/pyt/README.rst +++ b/pyt/README.rst @@ -5,8 +5,9 @@ Step 1 Parse command line arguments. - `parse_args` with `usage.py`_ + `parse_args`_ in `usage.py`_ + .. _parse_args: https://github.com/python-security/pyt/blob/re_organize_code/pyt/usage.py#L113 .. _usage.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/usage.py @@ -20,7 +21,7 @@ Step 2 import ast ast.parse(f.read()) - `generate_ast`_ from `ast_helper.py`_ + `generate_ast`_ in `ast_helper.py`_ .. _ast: https://docs.python.org/3/library/ast.html .. _generate_ast: https://github.com/python-security/pyt/blob/re_organize_code/pyt/core/ast_helper.py#L24 @@ -38,19 +39,23 @@ Step 4 .. _Framework Adaptor: https://github.com/python-security/pyt/tree/re_organize_code/pyt/web_frameworks Step 5 - Perform (modified-)reaching definitions `analysis`_, to know where definitions reach. + Perform `(modified-)reaching definitions analysis`_, to know where definitions reach. - .. _analysis: https://github.com/python-security/pyt/tree/re_organize_code/pyt/analysis + .. _\(modified\-\)reaching definitions analysis: https://github.com/python-security/pyt/tree/re_organize_code/pyt/analysis Step 6 - Find `vulnerabilities`_, by seeing where sources reach, and how. + `Find vulnerabilities`_, by seeing where sources reach, and how. - .. _vulnerabilities: https://github.com/python-security/pyt/tree/re_organize_code/pyt/vulnerabilities + .. _Find vulnerabilities: https://github.com/python-security/pyt/tree/re_organize_code/pyt/vulnerabilities Step 7 - Remove the already vulnerabilities if a baseline (JSON file of a previous run of PyT) is provided. + `Remove already known vulnerabilities`_ if a `baseline`_ (JSON file of a previous run of PyT) is provided. + + .. _Remove already known vulnerabilities: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerability_helper.py#L194 + .. _baseline: https://github.com/python-security/pyt/blob/re_organize_code/pyt/usage.py#L54 Step 8 - Output the results in either `text or JSON form`_, to stdout or the outfile. + Output the results in either `text or JSON form`_, to stdout or the `output file`_. .. _text or JSON form: https://github.com/python-security/pyt/tree/re_organize_code/pyt/formatters + .. _output file: https://github.com/python-security/pyt/blob/re_organize_code/pyt/usage.py#L80 From 49915aab523ee27813466334b4fbd4fb38b1af17 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 27 Apr 2018 18:59:23 -0700 Subject: [PATCH 075/291] Replace trigger word with source and sink --- pyt/vulnerabilities/vulnerability_helper.py | 4 +- .../vulnerabilities_across_files_test.py | 32 +++++------ tests/vulnerabilities/vulnerabilities_test.py | 56 +++++++++---------- 3 files changed, 46 insertions(+), 46 deletions(-) diff --git a/pyt/vulnerabilities/vulnerability_helper.py b/pyt/vulnerabilities/vulnerability_helper.py index bd2407dd..1104de13 100644 --- a/pyt/vulnerabilities/vulnerability_helper.py +++ b/pyt/vulnerabilities/vulnerability_helper.py @@ -68,9 +68,9 @@ def __str__(self): reassigned_str = _get_reassignment_str(self.reassignment_nodes) return ( 'File: {}\n' - ' > User input at line {}, trigger word "{}":\n' + ' > User input at line {}, source "{}":\n' '\t {}{}\nFile: {}\n' - ' > reaches line {}, trigger word "{}":\n' + ' > reaches line {}, sink "{}":\n' '\t{}'.format( self.source.path, self.source.line_number, self.source_trigger_word, diff --git a/tests/vulnerabilities/vulnerabilities_across_files_test.py b/tests/vulnerabilities/vulnerabilities_across_files_test.py index b33982ab..d8bd3840 100644 --- a/tests/vulnerabilities/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities/vulnerabilities_across_files_test.py @@ -65,7 +65,7 @@ def test_blackbox_library_call(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code_across_files/blackbox_library_call.py - > User input at line 12, trigger word "request.args.get(": + > User input at line 12, source "request.args.get(": ~call_1 = ret_request.args.get('suggestion') Reassigned in: File: examples/vulnerable_code_across_files/blackbox_library_call.py @@ -77,7 +77,7 @@ def test_blackbox_library_call(self): File: examples/vulnerable_code_across_files/blackbox_library_call.py > Line 16: hey = command File: examples/vulnerable_code_across_files/blackbox_library_call.py - > reaches line 17, trigger word "subprocess.call(": + > reaches line 17, sink "subprocess.call(": ~call_3 = ret_subprocess.call(hey, shell=True) This vulnerability is unknown due to: Label: ~call_2 = ret_scrypt.encrypt('echo ' + param + ' >> ' + 'menu.txt', 'password') """ @@ -90,7 +90,7 @@ def test_builtin_with_user_defined_inner(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/nested_functions_code/builtin_with_user_defined_inner.py - > User input at line 16, trigger word "form[": + > User input at line 16, source "form[": req_param = request.form['suggestion'] Reassigned in: File: examples/nested_functions_code/builtin_with_user_defined_inner.py @@ -112,7 +112,7 @@ def test_builtin_with_user_defined_inner(self): File: examples/nested_functions_code/builtin_with_user_defined_inner.py > Line 19: foo = ~call_1 File: examples/nested_functions_code/builtin_with_user_defined_inner.py - > reaches line 20, trigger word "subprocess.call(": + > reaches line 20, sink "subprocess.call(": ~call_3 = ret_subprocess.call(foo, shell=True) This vulnerability is unknown due to: Label: ~call_1 = ret_scrypt.encrypt(~call_2) """ @@ -124,7 +124,7 @@ def test_sink_with_result_of_blackbox_nested(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py - > User input at line 12, trigger word "form[": + > User input at line 12, source "form[": req_param = request.form['suggestion'] Reassigned in: File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py @@ -134,13 +134,13 @@ def test_sink_with_result_of_blackbox_nested(self): File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py > Line 13: result = ~call_1 File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py - > reaches line 14, trigger word "subprocess.call(": + > reaches line 14, sink "subprocess.call(": ~call_3 = ret_subprocess.call(result, shell=True) This vulnerability is unknown due to: Label: ~call_2 = ret_scrypt.encrypt(req_param) """ OTHER_EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py - > User input at line 12, trigger word "form[": + > User input at line 12, source "form[": req_param = request.form['suggestion'] Reassigned in: File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py @@ -150,7 +150,7 @@ def test_sink_with_result_of_blackbox_nested(self): File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py > Line 13: result = ~call_1 File: examples/nested_functions_code/sink_with_result_of_blackbox_nested.py - > reaches line 14, trigger word "subprocess.call(": + > reaches line 14, sink "subprocess.call(": ~call_3 = ret_subprocess.call(result, shell=True) This vulnerability is unknown due to: Label: ~call_1 = ret_scrypt.encrypt(~call_2) """ @@ -165,7 +165,7 @@ def test_sink_with_result_of_user_defined_nested(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > User input at line 16, trigger word "form[": + > User input at line 16, source "form[": req_param = request.form['suggestion'] Reassigned in: File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py @@ -199,7 +199,7 @@ def test_sink_with_result_of_user_defined_nested(self): File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py > Line 17: result = ~call_1 File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > reaches line 18, trigger word "subprocess.call(": + > reaches line 18, sink "subprocess.call(": ~call_3 = ret_subprocess.call(result, shell=True) """ self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) @@ -210,7 +210,7 @@ def test_sink_with_blackbox_inner(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/nested_functions_code/sink_with_blackbox_inner.py - > User input at line 12, trigger word "form[": + > User input at line 12, source "form[": req_param = request.form['suggestion'] Reassigned in: File: examples/nested_functions_code/sink_with_blackbox_inner.py @@ -218,14 +218,14 @@ def test_sink_with_blackbox_inner(self): File: examples/nested_functions_code/sink_with_blackbox_inner.py > Line 14: ~call_2 = ret_scrypt.encypt(~call_3) File: examples/nested_functions_code/sink_with_blackbox_inner.py - > reaches line 14, trigger word "subprocess.call(": + > reaches line 14, sink "subprocess.call(": ~call_1 = ret_subprocess.call(~call_2, shell=True) This vulnerability is unknown due to: Label: ~call_2 = ret_scrypt.encypt(~call_3) """ OTHER_EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/nested_functions_code/sink_with_blackbox_inner.py - > User input at line 12, trigger word "form[": + > User input at line 12, source "form[": req_param = request.form['suggestion'] Reassigned in: File: examples/nested_functions_code/sink_with_blackbox_inner.py @@ -233,7 +233,7 @@ def test_sink_with_blackbox_inner(self): File: examples/nested_functions_code/sink_with_blackbox_inner.py > Line 14: ~call_2 = ret_scrypt.encypt(~call_3) File: examples/nested_functions_code/sink_with_blackbox_inner.py - > reaches line 14, trigger word "subprocess.call(": + > reaches line 14, sink "subprocess.call(": ~call_1 = ret_subprocess.call(~call_2, shell=True) This vulnerability is unknown due to: Label: ~call_3 = ret_scrypt.encypt(req_param) """ @@ -248,7 +248,7 @@ def test_sink_with_user_defined_inner(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/nested_functions_code/sink_with_user_defined_inner.py - > User input at line 16, trigger word "form[": + > User input at line 16, source "form[": req_param = request.form['suggestion'] Reassigned in: File: examples/nested_functions_code/sink_with_user_defined_inner.py @@ -280,7 +280,7 @@ def test_sink_with_user_defined_inner(self): File: examples/nested_functions_code/sink_with_user_defined_inner.py > Line 18: ~call_2 = ret_outer File: examples/nested_functions_code/sink_with_user_defined_inner.py - > reaches line 18, trigger word "subprocess.call(": + > reaches line 18, sink "subprocess.call(": ~call_1 = ret_subprocess.call(~call_2, shell=True) """ self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index a1d48fd1..440f492e 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -148,7 +148,7 @@ def test_XSS_result(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/XSS.py - > User input at line 6, trigger word "request.args.get(": + > User input at line 6, source "request.args.get(": ~call_1 = ret_request.args.get('param', 'not set') Reassigned in: File: examples/vulnerable_code/XSS.py @@ -160,7 +160,7 @@ def test_XSS_result(self): File: examples/vulnerable_code/XSS.py > Line 10: ret_XSS1 = resp File: examples/vulnerable_code/XSS.py - > reaches line 9, trigger word "replace(": + > reaches line 9, sink "replace(": ~call_4 = ret_html.replace('{{ param }}', param) """ @@ -172,13 +172,13 @@ def test_command_injection_result(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/command_injection.py - > User input at line 15, trigger word "form[": + > User input at line 15, source "form[": param = request.form['suggestion'] Reassigned in: File: examples/vulnerable_code/command_injection.py > Line 16: command = 'echo ' + param + ' >> ' + 'menu.txt' File: examples/vulnerable_code/command_injection.py - > reaches line 18, trigger word "subprocess.call(": + > reaches line 18, sink "subprocess.call(": ~call_1 = ret_subprocess.call(command, shell=True) """ @@ -190,7 +190,7 @@ def test_path_traversal_result(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/path_traversal.py - > User input at line 15, trigger word "request.args.get(": + > User input at line 15, source "request.args.get(": ~call_1 = ret_request.args.get('image_name') Reassigned in: File: examples/vulnerable_code/path_traversal.py @@ -216,7 +216,7 @@ def test_path_traversal_result(self): File: examples/vulnerable_code/path_traversal.py > Line 19: foo = ~call_2 File: examples/vulnerable_code/path_traversal.py - > reaches line 20, trigger word "send_file(": + > reaches line 20, sink "send_file(": ~call_4 = ret_send_file(foo) """ @@ -228,7 +228,7 @@ def test_ensure_saved_scope(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/ensure_saved_scope.py - > User input at line 15, trigger word "request.args.get(": + > User input at line 15, source "request.args.get(": ~call_1 = ret_request.args.get('image_name') Reassigned in: File: examples/vulnerable_code/ensure_saved_scope.py @@ -254,7 +254,7 @@ def test_ensure_saved_scope(self): File: examples/vulnerable_code/ensure_saved_scope.py > Line 19: foo = ~call_2 File: examples/vulnerable_code/ensure_saved_scope.py - > reaches line 20, trigger word "send_file(": + > reaches line 20, sink "send_file(": ~call_4 = ret_send_file(image_name) """ @@ -266,7 +266,7 @@ def test_path_traversal_sanitised_result(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/path_traversal_sanitised.py - > User input at line 8, trigger word "request.args.get(": + > User input at line 8, source "request.args.get(": ~call_1 = ret_request.args.get('image_name') Reassigned in: File: examples/vulnerable_code/path_traversal_sanitised.py @@ -280,7 +280,7 @@ def test_path_traversal_sanitised_result(self): File: examples/vulnerable_code/path_traversal_sanitised.py > Line 12: ret_cat_picture = ~call_3 File: examples/vulnerable_code/path_traversal_sanitised.py - > reaches line 12, trigger word "send_file(": + > reaches line 12, sink "send_file(": ~call_3 = ret_send_file(~call_4) This vulnerability is sanitised by: Label: ~call_2 = ret_image_name.replace('..', '') """ @@ -293,7 +293,7 @@ def test_path_traversal_sanitised_2_result(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/path_traversal_sanitised_2.py - > User input at line 8, trigger word "request.args.get(": + > User input at line 8, source "request.args.get(": ~call_1 = ret_request.args.get('image_name') Reassigned in: File: examples/vulnerable_code/path_traversal_sanitised_2.py @@ -303,7 +303,7 @@ def test_path_traversal_sanitised_2_result(self): File: examples/vulnerable_code/path_traversal_sanitised_2.py > Line 12: ret_cat_picture = ~call_2 File: examples/vulnerable_code/path_traversal_sanitised_2.py - > reaches line 12, trigger word "send_file(": + > reaches line 12, sink "send_file(": ~call_2 = ret_send_file(~call_3) This vulnerability is potentially sanitised by: Label: if '..' in image_name: """ @@ -316,7 +316,7 @@ def test_sql_result(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/sql/sqli.py - > User input at line 26, trigger word "request.args.get(": + > User input at line 26, source "request.args.get(": ~call_1 = ret_request.args.get('param', 'not set') Reassigned in: File: examples/vulnerable_code/sql/sqli.py @@ -324,7 +324,7 @@ def test_sql_result(self): File: examples/vulnerable_code/sql/sqli.py > Line 27: result = ~call_2 File: examples/vulnerable_code/sql/sqli.py - > reaches line 27, trigger word "execute(": + > reaches line 27, sink "execute(": ~call_2 = ret_db.engine.execute(param) """ @@ -336,7 +336,7 @@ def test_XSS_form_result(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/XSS_form.py - > User input at line 14, trigger word "form[": + > User input at line 14, source "form[": data = request.form['my_text'] Reassigned in: File: examples/vulnerable_code/XSS_form.py @@ -346,7 +346,7 @@ def test_XSS_form_result(self): File: examples/vulnerable_code/XSS_form.py > Line 17: ret_example2_action = resp File: examples/vulnerable_code/XSS_form.py - > reaches line 15, trigger word "replace(": + > reaches line 15, sink "replace(": ~call_2 = ret_html1.replace('{{ data }}', data) """ @@ -358,7 +358,7 @@ def test_XSS_url_result(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/XSS_url.py - > User input at line 4, trigger word "Framework function URL parameter": + > User input at line 4, source "Framework function URL parameter": url Reassigned in: File: examples/vulnerable_code/XSS_url.py @@ -370,7 +370,7 @@ def test_XSS_url_result(self): File: examples/vulnerable_code/XSS_url.py > Line 10: ret_XSS1 = resp File: examples/vulnerable_code/XSS_url.py - > reaches line 9, trigger word "replace(": + > reaches line 9, sink "replace(": ~call_3 = ret_html.replace('{{ param }}', param) """ @@ -386,7 +386,7 @@ def test_XSS_reassign_result(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/XSS_reassign.py - > User input at line 6, trigger word "request.args.get(": + > User input at line 6, source "request.args.get(": ~call_1 = ret_request.args.get('param', 'not set') Reassigned in: File: examples/vulnerable_code/XSS_reassign.py @@ -400,7 +400,7 @@ def test_XSS_reassign_result(self): File: examples/vulnerable_code/XSS_reassign.py > Line 12: ret_XSS1 = resp File: examples/vulnerable_code/XSS_reassign.py - > reaches line 11, trigger word "replace(": + > reaches line 11, sink "replace(": ~call_4 = ret_html.replace('{{ param }}', param) """ @@ -412,7 +412,7 @@ def test_XSS_sanitised_result(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/XSS_sanitised.py - > User input at line 7, trigger word "request.args.get(": + > User input at line 7, source "request.args.get(": ~call_1 = ret_request.args.get('param', 'not set') Reassigned in: File: examples/vulnerable_code/XSS_sanitised.py @@ -428,7 +428,7 @@ def test_XSS_sanitised_result(self): File: examples/vulnerable_code/XSS_sanitised.py > Line 13: ret_XSS1 = resp File: examples/vulnerable_code/XSS_sanitised.py - > reaches line 12, trigger word "replace(": + > reaches line 12, sink "replace(": ~call_5 = ret_html.replace('{{ param }}', param) This vulnerability is sanitised by: Label: ~call_2 = ret_Markup.escape(param) """ @@ -445,7 +445,7 @@ def test_XSS_variable_assign_result(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/XSS_variable_assign.py - > User input at line 6, trigger word "request.args.get(": + > User input at line 6, source "request.args.get(": ~call_1 = ret_request.args.get('param', 'not set') Reassigned in: File: examples/vulnerable_code/XSS_variable_assign.py @@ -459,7 +459,7 @@ def test_XSS_variable_assign_result(self): File: examples/vulnerable_code/XSS_variable_assign.py > Line 12: ret_XSS1 = resp File: examples/vulnerable_code/XSS_variable_assign.py - > reaches line 11, trigger word "replace(": + > reaches line 11, sink "replace(": ~call_4 = ret_html.replace('{{ param }}', other_var) """ @@ -471,7 +471,7 @@ def test_XSS_variable_multiple_assign_result(self): vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/XSS_variable_multiple_assign.py - > User input at line 6, trigger word "request.args.get(": + > User input at line 6, source "request.args.get(": ~call_1 = ret_request.args.get('param', 'not set') Reassigned in: File: examples/vulnerable_code/XSS_variable_multiple_assign.py @@ -489,7 +489,7 @@ def test_XSS_variable_multiple_assign_result(self): File: examples/vulnerable_code/XSS_variable_multiple_assign.py > Line 17: ret_XSS1 = resp File: examples/vulnerable_code/XSS_variable_multiple_assign.py - > reaches line 15, trigger word "replace(": + > reaches line 15, sink "replace(": ~call_4 = ret_html.replace('{{ param }}', another_one) """ @@ -526,13 +526,13 @@ def test_django_view_param(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/django_XSS.py - > User input at line 4, trigger word "Framework function URL parameter": + > User input at line 4, source "Framework function URL parameter": param Reassigned in: File: examples/vulnerable_code/django_XSS.py > Line 5: ret_xss1 = ~call_1 File: examples/vulnerable_code/django_XSS.py - > reaches line 5, trigger word "render(": + > reaches line 5, sink "render(": ~call_1 = ret_render(request, 'templates/xss.html', 'param'param) """ self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) From f87539864dc0188f5c3beee3e053f83f643457c9 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 27 Apr 2018 19:28:59 -0700 Subject: [PATCH 076/291] Update README.rst --- README.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.rst b/README.rst index 874a0c95..cfe767b0 100644 --- a/README.rst +++ b/README.rst @@ -56,6 +56,12 @@ Install pip install python-taint +PyT can also be installed from source. To do so, clone the repo, and then install it: + +.. code-block:: python + + python3 setup.py install + Usage from Source ================= From 3bf405bd4a21bed9c595105400c448c56ef51540 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 27 Apr 2018 19:29:52 -0700 Subject: [PATCH 077/291] Add from source install instructions --- README.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.rst b/README.rst index 874a0c95..cfe767b0 100644 --- a/README.rst +++ b/README.rst @@ -56,6 +56,12 @@ Install pip install python-taint +PyT can also be installed from source. To do so, clone the repo, and then install it: + +.. code-block:: python + + python3 setup.py install + Usage from Source ================= From df2402ca6244783c75c611c4cbbfcb3e5bb1f8ab Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 27 Apr 2018 19:34:08 -0700 Subject: [PATCH 078/291] Add usage to root README.rst --- README.rst | 111 ++++++++++++++++++----------------------------------- 1 file changed, 38 insertions(+), 73 deletions(-) diff --git a/README.rst b/README.rst index 874a0c95..914c486e 100644 --- a/README.rst +++ b/README.rst @@ -25,19 +25,7 @@ Static analysis of Python web applications based on theoretical foundations (Con Features -------- -* Detect Command injection - -* Detect SQL injection - -* Detect XSS - -* Detect directory traversal - -* Get a control flow graph - -* Get a def-use and/or a use-def chain - -* Search GitHub and analyse hits with PyT +* Detect command injection, SSRF, SQL injection, XSS, directory traveral etc. * A lot of customisation possible @@ -56,67 +44,44 @@ Install pip install python-taint -Usage from Source -================= - -Using it like a user ``python3 -m pyt -f example/vulnerable_code/XSS_call.py save -du`` - -Running the tests ``python3 -m tests`` - -Running an individual test file ``python3 -m unittest tests.import_test`` - -Running an individual test ``python3 -m unittest tests.import_test.ImportTest.test_import`` - - -Contributions -============= - -Join our slack group: https://pyt-dev.slack.com/ - ask for invite: mr.thalmann@gmail.com - -`Guidelines`_ - -.. _Guidelines: https://github.com/python-security/pyt/blob/master/CONTRIBUTIONS.md - +PyT can also be installed from source. To do so, clone the repo, and then install it: -Virtual env setup guide -======================= - -Create a directory to hold the virtual env and project - -``mkdir ~/a_folder`` - -``cd ~/a_folder`` - -Clone the project into the directory - -``git clone https://github.com/python-security/pyt.git`` - -Create the virtual environment - -``python3 -m venv ~/a_folder/`` - -Check that you have the right versions - -``python3 --version`` sample output ``Python 3.6.0`` - -``pip --version`` sample output ``pip 9.0.1 from /Users/kevinhock/a_folder/lib/python3.6/site-packages (python 3.6)`` - -Change to project directory - -``cd pyt`` - -Install dependencies - -``pip install -r requirements.txt`` +.. code-block:: python -``pip list`` sample output :: + python3 setup.py install - gitdb (0.6.4) - GitPython (2.0.8) - graphviz (0.4.10) - pip (9.0.1) - requests (2.10.0) - setuptools (28.8.0) - smmap (0.9.0) +Usage +======= -In the future, just type ``source ~/a_folder/bin/activate`` to start developing. + usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] + [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] + [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [-trim] [-i] + + required arguments: + -f FILEPATH, --filepath FILEPATH + Path to the file that should be analysed. + + optional arguments: + -a ADAPTOR, --adaptor ADAPTOR + Choose a web framework adaptor: Flask(Default), + Django, Every or Pylons + -pr PROJECT_ROOT, --project-root PROJECT_ROOT + Add project root, only important when the entry file + is not at the root of the project. + -b BASELINE_JSON_FILE, --baseline BASELINE_JSON_FILE + Path of a baseline report to compare against (only + JSON-formatted files are accepted) + -j, --json Prints JSON instead of report. + -m BLACKBOX_MAPPING_FILE, --blackbox-mapping-file BLACKBOX_MAPPING_FILE + Input blackbox mapping file. + -t TRIGGER_WORD_FILE, --trigger-word-file TRIGGER_WORD_FILE + Input file with a list of sources and sinks + -o OUTPUT_FILE, --output OUTPUT_FILE + write report to filename + + print arguments: + -trim, --trim-reassigned-in + Trims the reassigned list to just the vulnerability + chain. + -i, --interactive Will ask you about each blackbox function call in + vulnerability chains. From 9e44237106e420497fdcb188f83a3cca246b1615 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 27 Apr 2018 19:37:44 -0700 Subject: [PATCH 079/291] Update README.rst --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index 2ca0a05d..8a233211 100644 --- a/README.rst +++ b/README.rst @@ -53,6 +53,7 @@ PyT can also be installed from source. To do so, clone the repo, and then instal Usage ======= +Usage:: usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [-trim] [-i] From 387e60eaa37efa8e79518bf4a7fbd7d7e44a538c Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 27 Apr 2018 19:40:01 -0700 Subject: [PATCH 080/291] Update README.rst --- README.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 8a233211..7016b0b2 100644 --- a/README.rst +++ b/README.rst @@ -50,10 +50,8 @@ PyT can also be installed from source. To do so, clone the repo, and then instal python3 setup.py install -Usage -======= -Usage:: + usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [-trim] [-i] From 03e2622c77bbbc4f732e18d9b8d71dae18c102b8 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 27 Apr 2018 19:40:39 -0700 Subject: [PATCH 081/291] Update README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 7016b0b2..6fa8b0a5 100644 --- a/README.rst +++ b/README.rst @@ -50,7 +50,7 @@ PyT can also be installed from source. To do so, clone the repo, and then instal python3 setup.py install - +.. code-block:: usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] From 8f68b3e81f06f3058fe5a3926b8d613240287a27 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 27 Apr 2018 19:42:39 -0700 Subject: [PATCH 082/291] Update README.rst --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index 6fa8b0a5..1ca64f95 100644 --- a/README.rst +++ b/README.rst @@ -50,6 +50,9 @@ PyT can also be installed from source. To do so, clone the repo, and then instal python3 setup.py install +Usage +===== + .. code-block:: usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] From c8a0f32dca93df593ad2a3780722e022cbc0898f Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 27 Apr 2018 19:43:19 -0700 Subject: [PATCH 083/291] Update README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 1ca64f95..d35eea1b 100644 --- a/README.rst +++ b/README.rst @@ -44,7 +44,7 @@ Install pip install python-taint -PyT can also be installed from source. To do so, clone the repo, and then install it: +PyT can also be installed from source. To do so, clone the repo, and then run: .. code-block:: python From 28004b1ff94aabef42a64174b112d237d2f27c7e Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 28 Apr 2018 11:24:08 -0700 Subject: [PATCH 084/291] Update README.rst --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index d35eea1b..b97dfe6d 100644 --- a/README.rst +++ b/README.rst @@ -19,6 +19,8 @@ Python Taint ============ +.. class:: center + Static analysis of Python web applications based on theoretical foundations (Control flow graphs, fixed point, dataflow analysis) -------- From 1a7be1486dba15d029cf03338fecbd356be59095 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 28 Apr 2018 11:24:29 -0700 Subject: [PATCH 085/291] Update README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index b97dfe6d..c48c4d64 100644 --- a/README.rst +++ b/README.rst @@ -21,7 +21,7 @@ Python Taint .. class:: center -Static analysis of Python web applications based on theoretical foundations (Control flow graphs, fixed point, dataflow analysis) + Static analysis of Python web applications based on theoretical foundations (Control flow graphs, fixed point, dataflow analysis) -------- Features From b94331006a4558bae6709f8fd603944d1cdb0161 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Sat, 28 Apr 2018 21:26:09 +0300 Subject: [PATCH 086/291] removed spaces --- pyt/vulnerabilities.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index d5bc2e1b..4dfe06ed 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -74,7 +74,7 @@ def identify_triggers( sources, sinks, lattice, - nosec_lines = set() + nosec_lines=set() ): """Identify sources, sinks and sanitisers in a CFG. @@ -172,7 +172,7 @@ def append_node_if_reassigned( def find_triggers( nodes, trigger_words, - nosec_lines = set() + nosec_lines=set() ): """Find triggers from the trigger_word_list in the nodes. @@ -470,7 +470,7 @@ def find_vulnerabilities_in_cfg( ui_mode, blackbox_mapping, vulnerabilities_list, - nosec_lines = set() + nosec_lines ): """Find vulnerabilities in a cfg. @@ -509,7 +509,7 @@ def find_vulnerabilities( analysis_type, ui_mode, vulnerability_files, - nosec_lines = set() + nosec_lines=set() ): """Find vulnerabilities in a list of CFGs from a trigger_word_file. From 11ce535b6225435cf86944ae184d5ac0708c6fbb Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 28 Apr 2018 11:26:47 -0700 Subject: [PATCH 087/291] Update README.rst --- README.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.rst b/README.rst index c48c4d64..f2cd2a01 100644 --- a/README.rst +++ b/README.rst @@ -19,9 +19,7 @@ Python Taint ============ -.. class:: center - - Static analysis of Python web applications based on theoretical foundations (Control flow graphs, fixed point, dataflow analysis) +

Static analysis of Python web applications based on theoretical foundations (Control flow graphs, fixed point, dataflow analysis)

-------- Features From cd011d7e2f9edb7803326121808e87fe2bf3a28f Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 28 Apr 2018 11:27:01 -0700 Subject: [PATCH 088/291] Update README.rst --- README.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index f2cd2a01..6a5a4a2b 100644 --- a/README.rst +++ b/README.rst @@ -19,7 +19,9 @@ Python Taint ============ -

Static analysis of Python web applications based on theoretical foundations (Control flow graphs, fixed point, dataflow analysis)

+

+Static analysis of Python web applications based on theoretical foundations (Control flow graphs, fixed point, dataflow analysis) +

-------- Features From fd3d74a6810672d869ccd5639c80e66682156ddc Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 28 Apr 2018 11:27:17 -0700 Subject: [PATCH 089/291] Update README.rst --- README.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.rst b/README.rst index 6a5a4a2b..d35eea1b 100644 --- a/README.rst +++ b/README.rst @@ -19,9 +19,7 @@ Python Taint ============ -

Static analysis of Python web applications based on theoretical foundations (Control flow graphs, fixed point, dataflow analysis) -

-------- Features From 175c23597706efab4826154222e15ed73a002a96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Sat, 28 Apr 2018 21:27:31 +0300 Subject: [PATCH 090/291] new line between imports and codes --- tests/vulnerabilities_across_files_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/vulnerabilities_across_files_test.py b/tests/vulnerabilities_across_files_test.py index 25dd1267..7492aee2 100644 --- a/tests/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities_across_files_test.py @@ -15,6 +15,7 @@ from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis from pyt.vulnerabilities import find_vulnerabilities + class EngineTest(BaseTestCase): def run_analysis(self, path): path = os.path.normpath(path) From c79161cb5416e32bf130ec4ea56f532a9eb8078e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Sat, 28 Apr 2018 21:32:33 +0300 Subject: [PATCH 091/291] Update __main__.py --- pyt/__main__.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 8ce6cc67..b81e6f03 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -143,8 +143,7 @@ def parse_args(args): type=str, default=False) parser.add_argument('--ignore-nosec', dest='ignore_nosec', action='http://github.com/store_true', - help='do not skip lines with # nosec comments' - ) + help='do not skip lines with # nosec comments') save_parser = subparsers.add_parser('save', help='Save menu.') save_parser.set_defaults(which='save') @@ -239,7 +238,7 @@ def main(command_line_args=sys.argv[1:]): elif args.trim_reassigned_in: ui_mode = UImode.TRIM - path = os.path.normpath(args.filepath) + path = os.path.normpath(args.filepath) cfg_list = list() if args.ignore_nosec: nosec_lines = set() @@ -249,8 +248,8 @@ def main(command_line_args=sys.argv[1:]): nosec_lines = set( lineno for (lineno, line) in enumerate(lines, start=1) - if '#nosec' in line or '# nosec' in line) - + if '#nosec' in line or '# nosec' in line) + if args.git_repos: repos = get_repos(args.git_repos) for repo in repos: From ed135144641ec398131866a35333626364857d05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Sat, 28 Apr 2018 21:35:07 +0300 Subject: [PATCH 092/291] new line between imports and codes --- tests/vulnerabilities_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index b4698e91..f3d77279 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -23,6 +23,7 @@ from pyt.node_types import Node from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis + class EngineTest(BaseTestCase): def run_empty(self): return From 092870010dfbf5368b9f974920f677ebb67b41a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Sat, 28 Apr 2018 21:36:16 +0300 Subject: [PATCH 093/291] removed set() from nosec_lines --- pyt/vulnerabilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index 4dfe06ed..1bc57753 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -74,7 +74,7 @@ def identify_triggers( sources, sinks, lattice, - nosec_lines=set() + nosec_lines ): """Identify sources, sinks and sanitisers in a CFG. From 5b372d267efa8cccc75c998b5d3fe56f2904f116 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 28 Apr 2018 14:41:23 -0700 Subject: [PATCH 094/291] Update README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index cfe767b0..3883e331 100644 --- a/README.rst +++ b/README.rst @@ -56,7 +56,7 @@ Install pip install python-taint -PyT can also be installed from source. To do so, clone the repo, and then install it: +PyT can also be installed from source. To do so, clone the repo, and then run: .. code-block:: python From 272b76e6e520cfa8a733502c364f7258171e6b2d Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 28 Apr 2018 15:19:56 -0700 Subject: [PATCH 095/291] First attempt at analysis/README.rst --- pyt/analysis/README.rst | 67 +++++++++++++++++++++++- pyt/helper_visitors/README.rst | 2 +- pyt/vulnerability_definitions/README.rst | 2 +- 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/pyt/analysis/README.rst b/pyt/analysis/README.rst index 8f9883b0..321401bd 100644 --- a/pyt/analysis/README.rst +++ b/pyt/analysis/README.rst @@ -1,3 +1,66 @@ -Coming soon. +This code is responsible for answering two questions: -This folder probably won't change at all for the lifetime of the project, unless we were to implement more advanced analyses like solving string constraints or doing alias analysis, right now and in the foreseeable future there are more pressing concerns, like handling web frameworks and handling all ast node types in the CFG construction. + +Where do definitions reach? +=========================== + +Traditionally `reaching definitions`_, a classic dataflow-analysis, +has been used to answer this question. To understand reaching definitions, +watch this `wonderful YouTube video`_ and come back here. +We use `reaching definitions`_, +with one small modification, a `reassignment check`_. + + +.. code-block:: python + + # Reassignment check + if cfg_node.left_hand_side not in cfg_node.right_hand_side_variables: + # Get previous assignments of cfg_node.left_hand_side and remove them from JOIN + arrow_result = self.arrow(JOIN, cfg_node.left_hand_side) + +As an example, + +.. code-block:: python + + image_name = request.args.get('image_name') + image_name = os.path.join(base_dir, image_name) + send_file(image_name) + +we still want to know that something from a request reached `send_file`. + + +.. _reaching definitions: https://en.wikipedia.org/wiki/Reaching_definition +.. _reassignment check: https://github.com/python-security/pyt/blob/re_organize_code/pyt/analysis/reaching_definitions_taint.py#L23-L26 +.. _wonderful YouTube video: https://www.youtube.com/watch?v=NVBQSR_HdL0 + + +How does a definition reach? +============================ + +After we know that a definition reaches a use that we are interested in +We make what are called `definition-use chains`_ figure out how that definition +reaches the use. This is necessary because there may be more than one path from +the definition to the use. + + +.. _definition-use chains: https://en.wikipedia.org/wiki/Use-define_chain + + +Additional details +================== + +This folder probably will not change at all for the lifetime of the project, +unless we were to implement more advanced analyses like `solving string +constraints`_ or doing `alias analysis`_. Right now and in the foreseeable +future there are more pressing concerns, like handling web frameworks +and handling all ast node types in the CFG construction. + +Stefan and Bruno like the `Schwartzbach notes`_, as you will see in some comments. +But looking these two algorithms up will yield countless results, my favorite is +this `amazing guy from YouTube`_. + + +.. _solving string constraints: https://zyh1121.github.io/z3str3Docs/inputLanguage.html +.. _alias analysis: https://www3.cs.stonybrook.edu/~liu/papers/Alias-DLS10.pdf +.. _Schwartzbach notes: http://lara.epfl.ch/w/_media/sav08:schwartzbach.pdf +.. _amazing guy from YouTube: https://www.youtube.com/watch?v=NVBQSR_HdL0 diff --git a/pyt/helper_visitors/README.rst b/pyt/helper_visitors/README.rst index 3ba5b13c..cce52a09 100644 --- a/pyt/helper_visitors/README.rst +++ b/pyt/helper_visitors/README.rst @@ -1 +1 @@ -Coming soon. +Documentation coming soon. diff --git a/pyt/vulnerability_definitions/README.rst b/pyt/vulnerability_definitions/README.rst index 3ba5b13c..cce52a09 100644 --- a/pyt/vulnerability_definitions/README.rst +++ b/pyt/vulnerability_definitions/README.rst @@ -1 +1 @@ -Coming soon. +Documentation coming soon. From ddb239f98f526f635bd4b12509fde4ed995cb1c0 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 28 Apr 2018 15:26:51 -0700 Subject: [PATCH 096/291] Update pyt/analysis/README.rst --- pyt/analysis/README.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pyt/analysis/README.rst b/pyt/analysis/README.rst index 321401bd..d785ffdb 100644 --- a/pyt/analysis/README.rst +++ b/pyt/analysis/README.rst @@ -7,8 +7,8 @@ Where do definitions reach? Traditionally `reaching definitions`_, a classic dataflow-analysis, has been used to answer this question. To understand reaching definitions, watch this `wonderful YouTube video`_ and come back here. -We use `reaching definitions`_, -with one small modification, a `reassignment check`_. +We use reaching definitions, with one small modification, +a `reassignment check`_. .. code-block:: python @@ -37,8 +37,8 @@ we still want to know that something from a request reached `send_file`. How does a definition reach? ============================ -After we know that a definition reaches a use that we are interested in -We make what are called `definition-use chains`_ figure out how that definition +After we know that a definition reaches a use that we are interested in, +we make what are called `definition-use chains`_ figure out how the definition reaches the use. This is necessary because there may be more than one path from the definition to the use. @@ -53,7 +53,7 @@ This folder probably will not change at all for the lifetime of the project, unless we were to implement more advanced analyses like `solving string constraints`_ or doing `alias analysis`_. Right now and in the foreseeable future there are more pressing concerns, like handling web frameworks -and handling all ast node types in the CFG construction. +and handling all AST node types in the `CFG construction`_. Stefan and Bruno like the `Schwartzbach notes`_, as you will see in some comments. But looking these two algorithms up will yield countless results, my favorite is @@ -62,5 +62,6 @@ this `amazing guy from YouTube`_. .. _solving string constraints: https://zyh1121.github.io/z3str3Docs/inputLanguage.html .. _alias analysis: https://www3.cs.stonybrook.edu/~liu/papers/Alias-DLS10.pdf +.. _CFG construction: https://github.com/python-security/pyt/tree/re_organize_code/pyt/cfg .. _Schwartzbach notes: http://lara.epfl.ch/w/_media/sav08:schwartzbach.pdf .. _amazing guy from YouTube: https://www.youtube.com/watch?v=NVBQSR_HdL0 From e84d99e7689e977be163e28ea114479bd4e7ac40 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Tue, 1 May 2018 09:52:57 -0700 Subject: [PATCH 097/291] Update README.rst --- pyt/cfg/README.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index a649a944..41b9bb99 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -1,5 +1,25 @@ +make_cfg is what __main__ calls, it takes the Abstract Syntax Tree, creates an ExprVisitor and return a Control Flow Graph. + +Statements can contain expressions, but not +ExprVisitor inherits from StmtVisitor, which inherits from `ast.NodeVisitor`_ from the standard library. + +https://github.com/python/cpython/blob/f2c1aa1661edb3e14ff8b7b9995f93e303c8acbb/Lib/ast.py#L249-L253 + +There is a `visit\_` function for almost every AST node type. + +We keep track of all the nodes while we visit by adding them to self.nodes, + +The two most illustrative functions are stmt_star_handler + +Upon visiting an If statement we will enter visit_If, which will call stmt_star_handler, that returns a namedtuple ControlFlowNode with the first statement, last_statements and break_statements. + +In visit_call we will call expr_star_handler on the arguments, that returns a named_tuple with the + +We create the control flow graph of the program we are analyzing. + These modules mirror the `abstract grammar`_ of Python. +.. _ast.NodeVisitor: https://docs.python.org/3/library/ast.html#ast.NodeVisitor .. _abstract grammar: https://docs.python.org/3/library/ast.html#abstract-grammar From 554d08f45f7fdacb13483f9deb0ffbab9f27fb61 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Tue, 1 May 2018 10:00:08 -0700 Subject: [PATCH 098/291] Update README.rst --- pyt/cfg/README.rst | 49 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index 41b9bb99..47fb56c1 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -1,18 +1,57 @@ make_cfg is what __main__ calls, it takes the Abstract Syntax Tree, creates an ExprVisitor and return a Control Flow Graph. -Statements can contain expressions, but not -ExprVisitor inherits from StmtVisitor, which inherits from `ast.NodeVisitor`_ from the standard library. +Statements can contain expressions, but not the other way around. This is why ExprVisitor inherits from StmtVisitor, (which inherits from `ast.NodeVisitor`_ from the standard library.) -https://github.com/python/cpython/blob/f2c1aa1661edb3e14ff8b7b9995f93e303c8acbb/Lib/ast.py#L249-L253 +.. code-block:: python + :caption: From ast.Nodevisitor + + def visit(self, node): + """Visit a node.""" + method = 'visit_' + node.__class__.__name__ + visitor = getattr(self, method, self.generic_visit) + return visitor(node) There is a `visit\_` function for almost every AST node type. -We keep track of all the nodes while we visit by adding them to self.nodes, +We keep track of all the nodes while we visit by adding them to self.nodes, connecting them via `ingoing` and `outgoing` node attributes. -The two most illustrative functions are stmt_star_handler +The two most illustrative functions are stmt_star_handler and expr_star_handler. Upon visiting an If statement we will enter visit_If, which will call stmt_star_handler, that returns a namedtuple ControlFlowNode with the first statement, last_statements and break_statements. +.. code-block:: python + + def visit_If(self, node): + test = self.append_node(IfNode( + node.test, + node, + path=self.filenames[-1] + )) + + body_connect_stmts = self.stmt_star_handler(node.body) + if isinstance(body_connect_stmts, IgnoredNode): + body_connect_stmts = ConnectStatements( + first_statement=test, + last_statements=[], + break_statements=[] + ) + test.connect(body_connect_stmts.first_statement) + + if node.orelse: + orelse_last_nodes = self.handle_or_else(node.orelse, test) + body_connect_stmts.last_statements.extend(orelse_last_nodes) + else: + body_connect_stmts.last_statements.append(test) # if there is no orelse, test needs an edge to the next_node + + last_statements = remove_breaks(body_connect_stmts.last_statements) + + return ControlFlowNode( + test, + last_statements, + break_statements=body_connect_stmts.break_statements + ) + + In visit_call we will call expr_star_handler on the arguments, that returns a named_tuple with the We create the control flow graph of the program we are analyzing. From 9d8459945767c171ec069ee8e8c1b577f608ec36 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Tue, 1 May 2018 10:00:30 -0700 Subject: [PATCH 099/291] Update README.rst --- pyt/cfg/README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index 47fb56c1..cf8ce430 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -2,6 +2,7 @@ make_cfg is what __main__ calls, it takes the Abstract Syntax Tree, creates an E Statements can contain expressions, but not the other way around. This is why ExprVisitor inherits from StmtVisitor, (which inherits from `ast.NodeVisitor`_ from the standard library.) + .. code-block:: python :caption: From ast.Nodevisitor @@ -11,6 +12,7 @@ Statements can contain expressions, but not the other way around. This is why Ex visitor = getattr(self, method, self.generic_visit) return visitor(node) + There is a `visit\_` function for almost every AST node type. We keep track of all the nodes while we visit by adding them to self.nodes, connecting them via `ingoing` and `outgoing` node attributes. From ad0767cd589309a49617a1f64e2985eb70d6d0c7 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Tue, 1 May 2018 10:00:47 -0700 Subject: [PATCH 100/291] Update README.rst --- pyt/cfg/README.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index cf8ce430..d35aaa46 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -4,7 +4,6 @@ Statements can contain expressions, but not the other way around. This is why Ex .. code-block:: python - :caption: From ast.Nodevisitor def visit(self, node): """Visit a node.""" From f86410d8aa7d47691a02fa5484ce37f148b3424d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Wed, 2 May 2018 00:56:32 +0300 Subject: [PATCH 101/291] Test for "try_orelse_with_no_variables_to_save" --- tests/cfg_test.py | 92 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 70 insertions(+), 22 deletions(-) diff --git a/tests/cfg_test.py b/tests/cfg_test.py index f3a3a28a..ce73ccb9 100644 --- a/tests/cfg_test.py +++ b/tests/cfg_test.py @@ -1,5 +1,8 @@ from .base_test_case import BaseTestCase -from pyt.node_types import EntryOrExitNode, Node +from pyt.node_types import ( + EntryOrExitNode, + Node +) class CFGGeneralTest(BaseTestCase): @@ -215,35 +218,80 @@ def test_orelse(self): print_good = 18 _exit = 19 - self.assertInCfg([self.connected(entry, try_), + self.assertInCfg([ + self.connected(entry, try_), - self.connected(try_, try_body), + self.connected(try_, try_body), - self.connected(try_body, print_a5), + self.connected(try_body, print_a5), - self.connected(print_a5, except_im), - self.connected(print_a5, save_node), - self.connected(print_a5, print_good), + self.connected(print_a5, except_im), + self.connected(print_a5, save_node), + self.connected(print_a5, print_good), - self.connected(except_im, except_im_body_1), + self.connected(except_im, except_im_body_1), - self.connected(except_im_body_1, value_equal_call_2), - self.connected(value_equal_call_2, print_wagyu), + self.connected(except_im_body_1, value_equal_call_2), + self.connected(value_equal_call_2, print_wagyu), + + self.connected(print_wagyu, print_good), + + self.connected(save_node, assign_to_temp), + self.connected(assign_to_temp, assign_from_temp), + self.connected(assign_from_temp, function_entry), + self.connected(function_entry, ret_of_subprocess_call), + self.connected(ret_of_subprocess_call, ret_does_this_kill_us_equal_call_5), + self.connected(ret_does_this_kill_us_equal_call_5, function_exit), + self.connected(function_exit, restore_node), + self.connected(restore_node, return_handler), + self.connected(return_handler, print_so), + + self.connected(print_so, print_good), + self.connected(print_good, _exit) + ]) - self.connected(print_wagyu, print_good), + def test_orelse_with_no_variables_to_save(self): + self.cfg_create_from_file('examples/example_inputs/try_orelse_with_no_variables_to_save.py') - self.connected(save_node, assign_to_temp), - self.connected(assign_to_temp, assign_from_temp), - self.connected(assign_from_temp, function_entry), - self.connected(function_entry, ret_of_subprocess_call), - self.connected(ret_of_subprocess_call, ret_does_this_kill_us_equal_call_5), - self.connected(ret_does_this_kill_us_equal_call_5, function_exit), - self.connected(function_exit, restore_node), - self.connected(restore_node, return_handler), - self.connected(return_handler, print_so), + self.nodes = self.cfg_list_to_dict(self.cfg.nodes) + self.assert_length(self.cfg.nodes, expected_length=15) + + entry = 0 + try_ = 1 + print_a5 = 2 + except_im = 3 + print_wagyu = 4 + temp_3_diff = 5 + diff = 6 + function_entry = 7 + ret_subprocess_call = 8 + ret_does_this_kill_us_4 = 9 + exit_does_this_kill_us = 10 + ret_does_this_kill_us_3 = 11 + print_so = 12 + print_good = 13 + _exit = 14 + + self.assertInCfg([ + self.connected(entry, try_), + self.connected(try_, print_a5), + self.connected(print_a5, except_im), + self.connected(print_a5, temp_3_diff), + self.connected(print_a5, print_good), + self.connected(except_im, print_wagyu), + self.connected(print_wagyu, print_good), + self.connected(temp_3_diff, diff), + self.connected(diff, function_entry), + self.connected(function_entry, ret_subprocess_call), + self.connected(ret_subprocess_call, ret_does_this_kill_us_4), + self.connected(ret_does_this_kill_us_4, exit_does_this_kill_us), + self.connected(exit_does_this_kill_us, ret_does_this_kill_us_3), + self.connected(ret_does_this_kill_us_3, print_so), + self.connected(print_so, print_good), + self.connected(print_good, _exit), + + ]) - self.connected(print_so, print_good), - self.connected(print_good, _exit)]) def test_final(self): self.cfg_create_from_file('examples/example_inputs/try_final.py') From 54e9563a57c4273f58ae00dfb1306ce814ecbcf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Wed, 2 May 2018 01:23:56 +0300 Subject: [PATCH 102/291] created new test created "test_try_orelse_with_no_variables_to_save_and_no_args" --- tests/cfg_test.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/cfg_test.py b/tests/cfg_test.py index ce73ccb9..2f7ef192 100644 --- a/tests/cfg_test.py +++ b/tests/cfg_test.py @@ -292,6 +292,43 @@ def test_orelse_with_no_variables_to_save(self): ]) + def test_try_orelse_with_no_variables_to_save_and_no_args(self): + self.cfg_create_from_file('examples/example_inputs/try_orelse_with_no_variables_to_save_and_no_args.py') + + self.nodes = self.cfg_list_to_dict(self.cfg.nodes) + self.assert_length(self.cfg.nodes, expected_length=13) + + entry = 0 + try_ = 1 + print_a5 = 2 + except_im = 3 + print_wagyu = 4 + function_entry = 5 + ret_subprocess_call = 6 + ret_does_this_kill_us_4 = 7 + exit_does_this_kill_us = 8 + ret_does_this_kill_us_3 = 9 + print_so = 10 + print_good = 11 + _exit = 12 + + self.assertInCfg([ + self.connected(entry, try_), + self.connected(try_, print_a5), + self.connected(print_a5, except_im), + self.connected(print_a5, function_entry), + self.connected(print_a5, print_good), + self.connected(except_im, print_wagyu), + self.connected(print_wagyu, print_good), + self.connected(function_entry, ret_subprocess_call), + self.connected(ret_subprocess_call, ret_does_this_kill_us_4), + self.connected(ret_does_this_kill_us_4, exit_does_this_kill_us), + self.connected(exit_does_this_kill_us, ret_does_this_kill_us_3), + self.connected(ret_does_this_kill_us_3, print_so), + self.connected(print_so, print_good), + self.connected(print_good, _exit), + + ]) def test_final(self): self.cfg_create_from_file('examples/example_inputs/try_final.py') From 890f6bf67d976af18e4241c2c5b3b4bdfcd5b32a Mon Sep 17 00:00:00 2001 From: KevinHock Date: Tue, 1 May 2018 19:23:20 -0700 Subject: [PATCH 103/291] Added some stmt_star_handler stuff --- pyt/cfg/README.rst | 126 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 98 insertions(+), 28 deletions(-) diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index d35aaa46..a4607841 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -1,7 +1,8 @@ -make_cfg is what __main__ calls, it takes the Abstract Syntax Tree, creates an ExprVisitor and return a Control Flow Graph. +make_cfg is what __main__ calls, it takes the Abstract Syntax Tree, creates an ExprVisitor and returns a Control Flow Graph. -Statements can contain expressions, but not the other way around. This is why ExprVisitor inherits from StmtVisitor, (which inherits from `ast.NodeVisitor`_ from the standard library.) +stmt_visitor.py and expr_visitor.py mirror the `abstract grammar`_ of Python. Statements can contain expressions, but not the other way around. This is why ExprVisitor inherits from StmtVisitor, (which inherits from `ast.NodeVisitor`_ from the standard library.) +This is how ast.NodeVisitor works: .. code-block:: python @@ -12,13 +13,32 @@ Statements can contain expressions, but not the other way around. This is why Ex return visitor(node) -There is a `visit\_` function for almost every AST node type. +So as you'll see, there is a `visit\_` function for almost every AST node type. We keep track of all the nodes while we visit by adding them to self.nodes, connecting them via `ingoing` and `outgoing` node attributes. -We keep track of all the nodes while we visit by adding them to self.nodes, connecting them via `ingoing` and `outgoing` node attributes. +The two most illustrative functions are stmt_star_handler and expr_star_handler. expr_star_handler has not been merged to master so let's talk about stmt_star_handler. -The two most illustrative functions are stmt_star_handler and expr_star_handler. -Upon visiting an If statement we will enter visit_If, which will call stmt_star_handler, that returns a namedtuple ControlFlowNode with the first statement, last_statements and break_statements. +Handling an if: statement +========================= + +Example code + +.. code-block:: python + + if some_condition: + x = 5 + +This is the relevant part of the `abstract grammar`_ + +.. code-block:: python + + If(expr test, stmt* body, stmt* orelse) + # Note: stmt* means any number of statements. + +Upon visiting an if: statement we will enter visit_If in stmt_visitor.py. We create one node for the test, and connect it with the first node of the body, which in this case is x = 5. + +which will call stmt_star_handler, that returns a namedtuple ConnectStatements with the first statement, last_statements and break_statements of all of the statements that were in the body of the node. + .. code-block:: python @@ -30,19 +50,14 @@ Upon visiting an If statement we will enter visit_If, which will call stmt_star_ )) body_connect_stmts = self.stmt_star_handler(node.body) - if isinstance(body_connect_stmts, IgnoredNode): - body_connect_stmts = ConnectStatements( - first_statement=test, - last_statements=[], - break_statements=[] - ) + # ... test.connect(body_connect_stmts.first_statement) if node.orelse: - orelse_last_nodes = self.handle_or_else(node.orelse, test) - body_connect_stmts.last_statements.extend(orelse_last_nodes) + # ... else: - body_connect_stmts.last_statements.append(test) # if there is no orelse, test needs an edge to the next_node + # if there is no orelse, test needs an edge to the next_node + body_connect_stmts.last_statements.append(test) last_statements = remove_breaks(body_connect_stmts.last_statements) @@ -52,26 +67,81 @@ Upon visiting an If statement we will enter visit_If, which will call stmt_star_ break_statements=body_connect_stmts.break_statements ) +Here is the code of stmt_star_handler -In visit_call we will call expr_star_handler on the arguments, that returns a named_tuple with the - -We create the control flow graph of the program we are analyzing. - -These modules mirror the `abstract grammar`_ of Python. - -.. _ast.NodeVisitor: https://docs.python.org/3/library/ast.html#ast.NodeVisitor -.. _abstract grammar: https://docs.python.org/3/library/ast.html#abstract-grammar - +.. code-block:: python -Dive into the raw ast NodeVisitor code. + def stmt_star_handler( + self, + stmts, + prev_node_to_avoid=None + ): + """Handle stmt* expressions in an AST node. + Links all statements together in a list of statements, accounting for statements with multiple last nodes. + """ + break_nodes = list() + cfg_statements = list() + + self.prev_nodes_to_avoid.append(prev_node_to_avoid) + self.last_control_flow_nodes.append(None) + + first_node = None + node_not_to_step_past = self.nodes[-1] + + for stmt in stmts: + node = self.visit(stmt) + + if isinstance(node, ControlFlowNode) and not isinstance(node.test, TryNode): + self.last_control_flow_nodes.append(node.test) + else: + self.last_control_flow_nodes.append(None) + + if isinstance(node, ControlFlowNode): + break_nodes.extend(node.break_statements) + elif isinstance(node, BreakNode): + break_nodes.append(node) + + if not isinstance(node, IgnoredNode): + cfg_statements.append(node) + if not first_node: + if isinstance(node, ControlFlowNode): + first_node = node.test + else: + first_node = get_first_node( + node, + node_not_to_step_past + ) + + self.prev_nodes_to_avoid.pop() + self.last_control_flow_nodes.pop() + + connect_nodes(cfg_statements) + + if cfg_statements: + if first_node: + first_statement = first_node + else: + first_statement = get_first_statement(cfg_statements[0]) + + last_statements = get_last_statements(cfg_statements) + + return ConnectStatements( + first_statement=first_statement, + last_statements=last_statements, + break_statements=break_nodes + ) + else: # When body of module only contains ignored nodes + return IgnoredNode() -Statements can contain expressions, but not the other way around, -so it was natural to have ExprVisitor inherit from StmtVisitor. +Notice how this code can handle an infinite amount of nested if: statements? This is why stmt_star_handler is so instrumental to making the StmtVisitor work. -TODO: stmt_star_handler and expr_star_handler explanations and walk throughs. +.. _ast.NodeVisitor: https://docs.python.org/3/library/ast.html#ast.NodeVisitor +.. _abstract grammar: https://docs.python.org/3/library/ast.html#abstract-grammar +References +========== For more information on AST nodes, see the `Green Tree Snakes`_ documentation. From b9a1bae922a833dc7d0807f92bd290ddc57d5841 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Tue, 1 May 2018 19:28:28 -0700 Subject: [PATCH 104/291] Trim stmt_star_handler code --- pyt/cfg/README.rst | 63 ++++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 39 deletions(-) diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index a4607841..be19c7e3 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -1,4 +1,4 @@ -make_cfg is what __main__ calls, it takes the Abstract Syntax Tree, creates an ExprVisitor and returns a Control Flow Graph. +make_cfg is what __main__.py calls, it takes the Abstract Syntax Tree, creates an ExprVisitor and returns a Control Flow Graph. stmt_visitor.py and expr_visitor.py mirror the `abstract grammar`_ of Python. Statements can contain expressions, but not the other way around. This is why ExprVisitor inherits from StmtVisitor, (which inherits from `ast.NodeVisitor`_ from the standard library.) @@ -73,8 +73,7 @@ Here is the code of stmt_star_handler def stmt_star_handler( self, - stmts, - prev_node_to_avoid=None + stmts ): """Handle stmt* expressions in an AST node. Links all statements together in a list of statements, accounting for statements with multiple last nodes. @@ -82,56 +81,42 @@ Here is the code of stmt_star_handler break_nodes = list() cfg_statements = list() - self.prev_nodes_to_avoid.append(prev_node_to_avoid) - self.last_control_flow_nodes.append(None) - first_node = None node_not_to_step_past = self.nodes[-1] for stmt in stmts: node = self.visit(stmt) - if isinstance(node, ControlFlowNode) and not isinstance(node.test, TryNode): - self.last_control_flow_nodes.append(node.test) - else: - self.last_control_flow_nodes.append(None) - if isinstance(node, ControlFlowNode): break_nodes.extend(node.break_statements) elif isinstance(node, BreakNode): break_nodes.append(node) - if not isinstance(node, IgnoredNode): - cfg_statements.append(node) - if not first_node: - if isinstance(node, ControlFlowNode): - first_node = node.test - else: - first_node = get_first_node( - node, - node_not_to_step_past - ) - - self.prev_nodes_to_avoid.pop() - self.last_control_flow_nodes.pop() + cfg_statements.append(node) + if not first_node: + if isinstance(node, ControlFlowNode): + first_node = node.test + else: + first_node = get_first_node( + node, + node_not_to_step_past + ) connect_nodes(cfg_statements) - if cfg_statements: - if first_node: - first_statement = first_node - else: - first_statement = get_first_statement(cfg_statements[0]) - - last_statements = get_last_statements(cfg_statements) - - return ConnectStatements( - first_statement=first_statement, - last_statements=last_statements, - break_statements=break_nodes - ) - else: # When body of module only contains ignored nodes - return IgnoredNode() + if first_node: + first_statement = first_node + else: + first_statement = get_first_statement(cfg_statements[0]) + + last_statements = get_last_statements(cfg_statements) + + return ConnectStatements( + first_statement=first_statement, + last_statements=last_statements, + break_statements=break_nodes + ) + Notice how this code can handle an infinite amount of nested if: statements? This is why stmt_star_handler is so instrumental to making the StmtVisitor work. From 13c66ba97bb98d72158c305efc7e1f34372eaed5 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Tue, 1 May 2018 19:29:15 -0700 Subject: [PATCH 105/291] Update README.rst --- pyt/cfg/README.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index be19c7e3..304243bb 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -76,7 +76,8 @@ Here is the code of stmt_star_handler stmts ): """Handle stmt* expressions in an AST node. - Links all statements together in a list of statements, accounting for statements with multiple last nodes. + Links all statements together in a list of statements. + Accounts for statements with multiple last nodes. """ break_nodes = list() cfg_statements = list() From 5f33fcb7f41898612ad45afabbd1bedaebd304e5 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 1 May 2018 19:44:31 -0700 Subject: [PATCH 106/291] [reorg] make tests pass --- pyt/__main__.py | 242 +----------------- pyt/usage.py | 8 +- pyt/vulnerabilities/vulnerabilities.py | 4 +- tests/usage_test.py | 25 +- tests/vulnerabilities/vulnerabilities_test.py | 6 +- 5 files changed, 34 insertions(+), 251 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 25409d85..b2302113 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -28,197 +28,6 @@ is_function, is_function_without_leading_ ) -<<<<<<< HEAD -======= -from .github_search import scan_github, set_github_api_token -from .lattice import print_lattice -from .liveness import LivenessAnalysis -from .project_handler import get_directory_modules, get_modules -from .reaching_definitions import ReachingDefinitionsAnalysis -from .reaching_definitions_taint import ReachingDefinitionsTaintAnalysis -from .repo_runner import get_repos -from .save import ( - cfg_to_file, - create_database, - def_use_chain_to_file, - lattice_to_file, - Output, - use_def_chain_to_file, - verbose_cfg_to_file, - vulnerabilities_to_file -) -from .vulnerabilities import find_vulnerabilities - - -def parse_args(args): - parser = argparse.ArgumentParser(prog='python -m pyt') - parser.set_defaults(which='') - - subparsers = parser.add_subparsers() - - entry_group = parser.add_mutually_exclusive_group(required=True) - entry_group.add_argument('-f', '--filepath', - help='Path to the file that should be analysed.', - type=str) - entry_group.add_argument('-gr', '--git-repos', - help='Takes a CSV file of git_url, path per entry.', - type=str) - - parser.add_argument('-pr', '--project-root', - help='Add project root, this is important when the entry' + - ' file is not at the root of the project.', type=str) - parser.add_argument('-d', '--draw-cfg', - help='Draw CFG and output as .pdf file.', - action='http://github.com/store_true') - parser.add_argument('-o', '--output-filename', - help='Output filename.', type=str) - parser.add_argument('-csv', '--csv-path', type=str, - help='Give the path of the csv file' - ' repos should be added to.') - - print_group = parser.add_mutually_exclusive_group() - print_group.add_argument('-p', '--print', - help='Prints the nodes of the CFG.', - action='http://github.com/store_true') - print_group.add_argument('-vp', '--verbose-print', - help='Verbose printing of -p.', action='http://github.com/store_true') - print_group.add_argument('-trim', '--trim-reassigned-in', - help='Trims the reassigned list to the vulnerability chain.', - action='http://github.com/store_true', - default=False) - print_group.add_argument('-i', '--interactive', - help='Will ask you about each vulnerability chain and blackbox nodes.', - action='http://github.com/store_true', - default=False) - - parser.add_argument('-t', '--trigger-word-file', - help='Input trigger word file.', - type=str, - default=default_trigger_word_file) - parser.add_argument('-m', '--blackbox-mapping-file', - help='Input blackbox mapping file.', - type=str, - default=default_blackbox_mapping_file) - parser.add_argument('-py2', '--python-2', - help='[WARNING, EXPERIMENTAL] Turns on Python 2 mode,' + - ' needed when target file(s) are written in Python 2.', action='http://github.com/store_true') - parser.add_argument('-l', '--log-level', - help='Choose logging level: CRITICAL, ERROR,' + - ' WARNING(Default), INFO, DEBUG, NOTSET.', type=str) - parser.add_argument('-a', '--adaptor', - help='Choose an adaptor: Flask(Default), Django, Every or Pylons', - type=str) - parser.add_argument('-db', '--create-database', - help='Creates a sql file that can be used to' + - ' create a database.', action='http://github.com/store_true') - parser.add_argument('-dl', '--draw-lattice', - nargs='+', help='Draws a lattice.') - parser.add_argument('-j', '--json', - help='Prints JSON instead of report.', - action='http://github.com/store_true', - default=False) - - analysis_group = parser.add_mutually_exclusive_group() - analysis_group.add_argument('-li', '--liveness', - help='Run liveness analysis. Default is' + - ' reaching definitions tainted version.', - action='http://github.com/store_true') - analysis_group.add_argument('-re', '--reaching', - help='Run reaching definitions analysis.' + - ' Default is reaching definitions' + - ' tainted version.', action='http://github.com/store_true') - analysis_group.add_argument('-rt', '--reaching-taint', - help='This is the default analysis:' + - ' reaching definitions tainted version.', - action='http://github.com/store_true') - - parser.add_argument('-ppm', '--print-project-modules', - help='Print project modules.', action='http://github.com/store_true') - parser.add_argument('-b', '--baseline', - help='path of a baseline report to compare against ' - '(only JSON-formatted files are accepted)', - type=str, - default=False) - parser.add_argument('--ignore-nosec', dest='ignore_nosec', action='http://github.com/store_true', - help='do not skip lines with # nosec comments') - - save_parser = subparsers.add_parser('save', help='Save menu.') - save_parser.set_defaults(which='save') - save_parser.add_argument('-fp', '--filename-prefix', - help='Filename prefix fx file_lattice.pyt', - type=str) - save_parser.add_argument('-du', '--def-use-chain', - help='Output the def-use chain(s) to file.', - action='http://github.com/store_true') - save_parser.add_argument('-ud', '--use-def-chain', - help='Output the use-def chain(s) to file', - action='http://github.com/store_true') - save_parser.add_argument('-cfg', '--control-flow-graph', - help='Output the CFGs to file.', - action='http://github.com/store_true') - save_parser.add_argument('-vcfg', '--verbose-control-flow-graph', - help='Output the verbose CFGs to file.', - action='http://github.com/store_true') - save_parser.add_argument('-an', '--analysis', - help='Output analysis results to file' + - ' in form of a constraint table.', - action='http://github.com/store_true') - save_parser.add_argument('-la', '--lattice', help='Output lattice(s) to file.', - action='http://github.com/store_true') - save_parser.add_argument('-vu', '--vulnerabilities', - help='Output vulnerabilities to file.', - action='http://github.com/store_true') - save_parser.add_argument('-all', '--save-all', - help='Output everything to file.', - action='http://github.com/store_true') - - - search_parser = subparsers.add_parser( - 'github_search', - help='Searches through github and runs PyT' - ' on found repositories. This can take some time.') - search_parser.set_defaults(which='search') - - search_parser.add_argument( - '-ss', '--search-string', required=True, - help='String for searching for repos on github.', type=str) - - search_parser.add_argument('-sd', '--start-date', - help='Start date for repo search. ' - 'Criteria used is Created Date.', - type=valid_date, - default=date(2010, 1, 1)) - return parser.parse_args(args) - - -def analyse_repo(args, github_repo, analysis_type, ui_mode, nosec_lines): - cfg_list = list() - directory = os.path.dirname(github_repo.path) - project_modules = get_modules(directory) - local_modules = get_directory_modules(directory) - tree = generate_ast(github_repo.path) - cfg = make_cfg( - tree, - project_modules, - local_modules, - github_repo.path - ) - cfg_list.append(cfg) - - initialize_constraint_table(cfg_list) - analyse(cfg_list, analysis_type=analysis_type) - vulnerabilities = find_vulnerabilities( - cfg_list, - analysis_type, - ui_mode, - VulnerabilityFiles( - args.blackbox_mapping_file, - args.trigger_word_file - ), - nosec_lines - ) - return vulnerabilities ->>>>>>> 5b372d267efa8cccc75c998b5d3fe56f2904f116 def main(command_line_args=sys.argv[1:]): @@ -231,48 +40,18 @@ def main(command_line_args=sys.argv[1:]): ui_mode = UImode.TRIM path = os.path.normpath(args.filepath) -<<<<<<< HEAD -======= - cfg_list = list() + if args.ignore_nosec: nosec_lines = set() else: - file = open(path, "r") + file = open(path, 'r') lines = file.readlines() nosec_lines = set( - lineno for - (lineno, line) in enumerate(lines, start=1) - if '#nosec' in line or '# nosec' in line) - - if args.git_repos: - repos = get_repos(args.git_repos) - for repo in repos: - repo.clone() - vulnerabilities = analyse_repo(args, repo, analysis, ui_mode, nosec_lines) - if args.json: - json.report(vulnerabilities, sys.stdout) - else: - text.report(vulnerabilities, sys.stdout) - if not vulnerabilities: - repo.clean_up() - exit() - - - if args.which == 'search': - set_github_api_token() - scan_github( - args.search_string, - args.start_date, - analysis, - analyse_repo, - args.csv_path, - ui_mode, - args + lineno for + (lineno, line) in enumerate(lines, start=1) + if '#nosec' in line or '# nosec' in line ) - exit() - directory = None ->>>>>>> 5b372d267efa8cccc75c998b5d3fe56f2904f116 if args.project_root: directory = os.path.normpath(args.project_root) else: @@ -310,18 +89,11 @@ def main(command_line_args=sys.argv[1:]): vulnerabilities = find_vulnerabilities( cfg_list, ui_mode, -<<<<<<< HEAD args.blackbox_mapping_file, - args.trigger_word_file -======= - VulnerabilityFiles( - args.blackbox_mapping_file, - args.trigger_word_file - ), + args.trigger_word_file, nosec_lines ->>>>>>> 5b372d267efa8cccc75c998b5d3fe56f2904f116 ) - + if args.baseline: vulnerabilities = get_vulnerabilities_not_in_baseline( vulnerabilities, diff --git a/pyt/usage.py b/pyt/usage.py index d633cf79..4930eb02 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -85,6 +85,12 @@ def _add_optional_group(parser): type=argparse.FileType('w'), default=sys.stdout, ) + optional_group.add_argument( + '--ignore-nosec', + dest='ignore_nosec', + action='http://github.com/store_true', + help='do not skip lines with # nosec comments' + ) def _add_print_group(parser): @@ -106,8 +112,6 @@ def _add_print_group(parser): def _check_required_and_mutually_exclusive_args(parser, args): if args.filepath is None: parser.error('The -f/--filepath argument is required') - if args.trim_reassigned_in and args.interactive: - parser.error('argument -i/--interactive: not allowed with argument -trim/--trim-reassigned-in') def parse_args(args): diff --git a/pyt/vulnerabilities/vulnerabilities.py b/pyt/vulnerabilities/vulnerabilities.py index ef4da52e..b41ae374 100644 --- a/pyt/vulnerabilities/vulnerabilities.py +++ b/pyt/vulnerabilities/vulnerabilities.py @@ -129,7 +129,7 @@ def append_node_if_reassigned( def find_triggers( nodes, trigger_words, - nosec_lines=set() + nosec_lines ): """Find triggers from the trigger_word_list in the nodes. @@ -468,7 +468,7 @@ def find_vulnerabilities( cfg_list, ui_mode, blackbox_mapping_file, - source_sink_file + source_sink_file, nosec_lines=set() ): """Find vulnerabilities in a list of CFGs from a trigger_word_file. diff --git a/tests/usage_test.py b/tests/usage_test.py index 2b8673c9..cae390e5 100644 --- a/tests/usage_test.py +++ b/tests/usage_test.py @@ -27,7 +27,8 @@ def test_no_args(self): EXPECTED = """usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] - [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [-trim] [-i] + [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] + [-trim] [-i] required arguments: -f FILEPATH, --filepath FILEPATH @@ -50,6 +51,7 @@ def test_no_args(self): Input file with a list of sources and sinks -o OUTPUT_FILE, --output OUTPUT_FILE write report to filename + --ignore-nosec do not skip lines with # nosec comments print arguments: -trim, --trim-reassigned-in @@ -67,22 +69,23 @@ def test_valid_args_but_no_filepath(self): EXPECTED = """usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] - [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [-trim] [-i] + [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] + [-trim] [-i] python -m pyt: error: The -f/--filepath argument is required\n""" self.assertEqual(stderr.getvalue(), EXPECTED) - def test_using_both_mutually_exclusive_args(self): - with self.assertRaises(SystemExit): - with capture_sys_output() as (_, stderr): - parse_args(['-f', 'foo.py', '-trim', '--interactive']) +# def test_using_both_mutually_exclusive_args(self): +# with self.assertRaises(SystemExit): +# with capture_sys_output() as (_, stderr): +# parse_args(['-f', 'foo.py', '-trim', '--interactive']) - EXPECTED = """usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] - [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] - [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [-trim] [-i] -python -m pyt: error: argument -i/--interactive: not allowed with argument -trim/--trim-reassigned-in\n""" +# EXPECTED = """usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] +# [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] +# [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [-trim] [-i] +# python -m pyt: error: argument -i/--interactive: not allowed with argument -trim/--trim-reassigned-in\n""" - self.assertEqual(stderr.getvalue(), EXPECTED) +# self.assertEqual(stderr.getvalue(), EXPECTED) def test_normal_usage(self): with capture_sys_output() as (stdout, stderr): diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 440f492e..5e40a60f 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -85,7 +85,11 @@ def test_find_triggers(self): XSS1 = cfg_list[1] trigger_words = [('get', [])] - l = vulnerabilities.find_triggers(XSS1.nodes, trigger_words) + l = vulnerabilities.find_triggers( + XSS1.nodes, + trigger_words, + nosec_lines=set() + ) self.assert_length(l, expected_length=1) def test_find_sanitiser_nodes(self): From f7d9ff3c5413fcea771048e0115bd26adc20421c Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 1 May 2018 19:49:54 -0700 Subject: [PATCH 107/291] add ignore-nosec to readme --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index 221d6e0c..ee39de0a 100644 --- a/README.rst +++ b/README.rst @@ -80,6 +80,7 @@ Usage Input file with a list of sources and sinks -o OUTPUT_FILE, --output OUTPUT_FILE write report to filename + --ignore-nosec do not skip lines with # nosec comments print arguments: -trim, --trim-reassigned-in From a7be197e26604754ba64930a34b18d7bb1113931 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Wed, 2 May 2018 11:46:35 +0300 Subject: [PATCH 108/291] unnecessary lines removed in self.assertInCfg --- tests/cfg_test.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/cfg_test.py b/tests/cfg_test.py index 2f7ef192..9bd49255 100644 --- a/tests/cfg_test.py +++ b/tests/cfg_test.py @@ -288,9 +288,8 @@ def test_orelse_with_no_variables_to_save(self): self.connected(exit_does_this_kill_us, ret_does_this_kill_us_3), self.connected(ret_does_this_kill_us_3, print_so), self.connected(print_so, print_good), - self.connected(print_good, _exit), - - ]) + self.connected(print_good, _exit) + ]) def test_try_orelse_with_no_variables_to_save_and_no_args(self): self.cfg_create_from_file('examples/example_inputs/try_orelse_with_no_variables_to_save_and_no_args.py') @@ -326,8 +325,7 @@ def test_try_orelse_with_no_variables_to_save_and_no_args(self): self.connected(exit_does_this_kill_us, ret_does_this_kill_us_3), self.connected(ret_does_this_kill_us_3, print_so), self.connected(print_so, print_good), - self.connected(print_good, _exit), - + self.connected(print_good, _exit) ]) def test_final(self): From 9b8e3d7c9d13f00b7f83323cf52ded73a473f0f5 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 5 May 2018 10:14:51 -0700 Subject: [PATCH 109/291] Add image to pyt/ readme, add link to pyt/ readme from root readme, finish cfg readme for now --- README.rst | 7 +++++++ pyt/README.rst | 3 +++ pyt/cfg/README.rst | 11 +++++------ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index ee39de0a..db4ba9e5 100644 --- a/README.rst +++ b/README.rst @@ -88,3 +88,10 @@ Usage chain. -i, --interactive Will ask you about each blackbox function call in vulnerability chains. + +How It Works +============ + +You will find a README.rst in every directory in the pyt folder, `start here`_. + +.. _start here: https://github.com/python-security/pyt/tree/re_organize_code/pyt diff --git a/pyt/README.rst b/pyt/README.rst index 611002a5..8cc1e5ea 100644 --- a/pyt/README.rst +++ b/pyt/README.rst @@ -59,3 +59,6 @@ Step 8 .. _text or JSON form: https://github.com/python-security/pyt/tree/re_organize_code/pyt/formatters .. _output file: https://github.com/python-security/pyt/blob/re_organize_code/pyt/usage.py#L80 + + +.. image:: https://github.com/KevinHock/rtdpyt/blob/master/docs/img/overview.png diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index 304243bb..9f007bbc 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -35,9 +35,12 @@ This is the relevant part of the `abstract grammar`_ If(expr test, stmt* body, stmt* orelse) # Note: stmt* means any number of statements. -Upon visiting an if: statement we will enter visit_If in stmt_visitor.py. We create one node for the test, and connect it with the first node of the body, which in this case is x = 5. -which will call stmt_star_handler, that returns a namedtuple ConnectStatements with the first statement, last_statements and break_statements of all of the statements that were in the body of the node. +Upon visiting an if: statement we will enter visit_If in stmt_visitor.py. Since we know that the test is just one expression, we can just call self.visit() on it. The body could be an infinite number of statements, so we use the stmt_star_handler function. + +stmt_star_handler returns a namedtuple (ConnectStatements) with the first statement, last_statements and break_statements of all of the statements that were in the body of the node. stmt_star_handler takes care of connecting each statement in the body to the next one. + +We then connect the test node to the first node in the body (if some_condition -> x = 5) and return a namedtuple (ControlFlowNode) with the test, last_statements and break_statements. .. code-block:: python @@ -119,10 +122,6 @@ Here is the code of stmt_star_handler ) - -Notice how this code can handle an infinite amount of nested if: statements? This is why stmt_star_handler is so instrumental to making the StmtVisitor work. - - .. _ast.NodeVisitor: https://docs.python.org/3/library/ast.html#ast.NodeVisitor .. _abstract grammar: https://docs.python.org/3/library/ast.html#abstract-grammar From 5781228767f2742b4a4fc738b5e8dbd4513f0fc9 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 5 May 2018 10:25:57 -0700 Subject: [PATCH 110/291] update readme's --- pyt/README.rst | 6 ++++++ pyt/cfg/README.rst | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pyt/README.rst b/pyt/README.rst index 8cc1e5ea..64e4439a 100644 --- a/pyt/README.rst +++ b/pyt/README.rst @@ -1,3 +1,6 @@ +How It Works +============ + `__main__.py`_ is where all the high-level steps happen. .. _\_\_main\_\_.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/__main__.py @@ -60,5 +63,8 @@ Step 8 .. _text or JSON form: https://github.com/python-security/pyt/tree/re_organize_code/pyt/formatters .. _output file: https://github.com/python-security/pyt/blob/re_organize_code/pyt/usage.py#L80 +Here is an image from the `orginal thesis`_: .. image:: https://github.com/KevinHock/rtdpyt/blob/master/docs/img/overview.png + +.. _original thesis: http://projekter.aau.dk/projekter/files/239563289/final.pdf#page=57 diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index 9f007bbc..d5199bfb 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -70,7 +70,6 @@ We then connect the test node to the first node in the body (if some_condition - break_statements=body_connect_stmts.break_statements ) -Here is the code of stmt_star_handler .. code-block:: python From c7199abe76f6e4190a41d12563f5785f400c23cd Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 5 May 2018 10:27:33 -0700 Subject: [PATCH 111/291] type --- pyt/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/README.rst b/pyt/README.rst index 64e4439a..d1474108 100644 --- a/pyt/README.rst +++ b/pyt/README.rst @@ -63,7 +63,7 @@ Step 8 .. _text or JSON form: https://github.com/python-security/pyt/tree/re_organize_code/pyt/formatters .. _output file: https://github.com/python-security/pyt/blob/re_organize_code/pyt/usage.py#L80 -Here is an image from the `orginal thesis`_: +Here is an image from the `original thesis`_: .. image:: https://github.com/KevinHock/rtdpyt/blob/master/docs/img/overview.png From 935a04d522c37b4344e61b169d7fc8d1bf34aff9 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 5 May 2018 10:28:47 -0700 Subject: [PATCH 112/291] change page of thesis in link --- pyt/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/README.rst b/pyt/README.rst index d1474108..5dc8255e 100644 --- a/pyt/README.rst +++ b/pyt/README.rst @@ -67,4 +67,4 @@ Here is an image from the `original thesis`_: .. image:: https://github.com/KevinHock/rtdpyt/blob/master/docs/img/overview.png -.. _original thesis: http://projekter.aau.dk/projekter/files/239563289/final.pdf#page=57 +.. _original thesis: http://projekter.aau.dk/projekter/files/239563289/final.pdf#page=62 From 1cb06883a41317f654d9d670711be7c218ccc642 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 5 May 2018 10:42:02 -0700 Subject: [PATCH 113/291] update readme's, more links mostly --- pyt/analysis/README.rst | 25 +++++++++++++++++++++++-- pyt/cfg/README.rst | 17 ++++++++++++----- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/pyt/analysis/README.rst b/pyt/analysis/README.rst index d785ffdb..e1ba9ef1 100644 --- a/pyt/analysis/README.rst +++ b/pyt/analysis/README.rst @@ -38,12 +38,33 @@ How does a definition reach? ============================ After we know that a definition reaches a use that we are interested in, -we make what are called `definition-use chains`_ figure out how the definition +we make what use called `definition-use chains`_ to figure out how the definition reaches the use. This is necessary because there may be more than one path from -the definition to the use. +the definition to the use. Here is the code from `definition_chains.py`_: +.. code-block:: python + + def build_def_use_chain( + cfg_nodes, + lattice + ): + def_use = defaultdict(list) + # For every node + for node in cfg_nodes: + # That's a definition + if isinstance(node, AssignmentNode): + # Get the uses + for variable in node.right_hand_side_variables: + # Loop through most of the nodes before it + for earlier_node in get_constraint_nodes(node, lattice): + # and add them to the 'uses list' of each earlier node, when applicable + # 'earlier node' here being a simplification + if variable in earlier_node.left_hand_side: + def_use[earlier_node].append(node) + return def_use .. _definition-use chains: https://en.wikipedia.org/wiki/Use-define_chain +.. _definition_chains.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/analysis/definition_chains.py#L16-L33 Additional details diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index d5199bfb..595014ae 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -1,4 +1,7 @@ -make_cfg is what __main__.py calls, it takes the Abstract Syntax Tree, creates an ExprVisitor and returns a Control Flow Graph. +`make_cfg`_ is what `__main__.py`_ calls, it takes the Abstract Syntax Tree, creates an ExprVisitor and returns a Control Flow Graph. + +.. _make_cfg: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/make_cfg.py#L22-L38 +.. _\_\_main\_\_.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/__main__.py#L33-L106 stmt_visitor.py and expr_visitor.py mirror the `abstract grammar`_ of Python. Statements can contain expressions, but not the other way around. This is why ExprVisitor inherits from StmtVisitor, (which inherits from `ast.NodeVisitor`_ from the standard library.) @@ -13,9 +16,11 @@ This is how ast.NodeVisitor works: return visitor(node) -So as you'll see, there is a `visit\_` function for almost every AST node type. We keep track of all the nodes while we visit by adding them to self.nodes, connecting them via `ingoing` and `outgoing` node attributes. +So as you'll see, there is a `visit\_` function for almost every AST node type. We keep track of all the nodes while we visit by adding them to self.nodes, connecting them via `\`ingoing\` and \`outgoing\` node attributes`_. + +.. _\`ingoing\` and \`outgoing\` node attributes_: https://github.com/python-security/pyt/blob/re_organize_code/pyt/core/node_types.py#L27-L48 -The two most illustrative functions are stmt_star_handler and expr_star_handler. expr_star_handler has not been merged to master so let's talk about stmt_star_handler. +The two most illustrative functions are `stmt_star_handler`_ and expr_star_handler. expr_star_handler has not been merged to master so let's talk about `stmt_star_handler`_. Handling an if: statement @@ -36,12 +41,14 @@ This is the relevant part of the `abstract grammar`_ # Note: stmt* means any number of statements. -Upon visiting an if: statement we will enter visit_If in stmt_visitor.py. Since we know that the test is just one expression, we can just call self.visit() on it. The body could be an infinite number of statements, so we use the stmt_star_handler function. +Upon visiting an if: statement we will enter visit_If in stmt_visitor.py. Since we know that the test is just one expression, we can just call self.visit() on it. The body could be an infinite number of statements, so we use the `stmt_star_handler`_ function. -stmt_star_handler returns a namedtuple (ConnectStatements) with the first statement, last_statements and break_statements of all of the statements that were in the body of the node. stmt_star_handler takes care of connecting each statement in the body to the next one. +`stmt_star_handler`_ returns a namedtuple (ConnectStatements) with the first statement, last_statements and break_statements of all of the statements that were in the body of the node. `stmt_star_handler`_ takes care of connecting each statement in the body to the next one. We then connect the test node to the first node in the body (if some_condition -> x = 5) and return a namedtuple (ControlFlowNode) with the test, last_statements and break_statements. +.. _stmt_star_handler: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/stmt_visitor.py#L60-L121 + .. code-block:: python From 96440607f02539365e7c785a150fbbf043ec0908 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 5 May 2018 10:43:47 -0700 Subject: [PATCH 114/291] update broken link due to rst links being hard --- pyt/cfg/README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index 595014ae..7e5b1077 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -16,9 +16,9 @@ This is how ast.NodeVisitor works: return visitor(node) -So as you'll see, there is a `visit\_` function for almost every AST node type. We keep track of all the nodes while we visit by adding them to self.nodes, connecting them via `\`ingoing\` and \`outgoing\` node attributes`_. +So as you'll see, there is a `visit\_` function for almost every AST node type. We keep track of all the nodes while we visit by adding them to self.nodes, connecting them via `ingoing and outgoing node attributes`_. -.. _\`ingoing\` and \`outgoing\` node attributes_: https://github.com/python-security/pyt/blob/re_organize_code/pyt/core/node_types.py#L27-L48 +.. _ingoing and outgoing node attributes_: https://github.com/python-security/pyt/blob/re_organize_code/pyt/core/node_types.py#L27-L48 The two most illustrative functions are `stmt_star_handler`_ and expr_star_handler. expr_star_handler has not been merged to master so let's talk about `stmt_star_handler`_. From b2ebd64a0a4d1ced24676046d0b0624b4f7cc79a Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 5 May 2018 10:44:26 -0700 Subject: [PATCH 115/291] update broken link due to rst links being hard --- pyt/cfg/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index 7e5b1077..a6c82297 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -18,7 +18,7 @@ This is how ast.NodeVisitor works: So as you'll see, there is a `visit\_` function for almost every AST node type. We keep track of all the nodes while we visit by adding them to self.nodes, connecting them via `ingoing and outgoing node attributes`_. -.. _ingoing and outgoing node attributes_: https://github.com/python-security/pyt/blob/re_organize_code/pyt/core/node_types.py#L27-L48 +.. _ingoing and outgoing node attributes: https://github.com/python-security/pyt/blob/re_organize_code/pyt/core/node_types.py#L27-L48 The two most illustrative functions are `stmt_star_handler`_ and expr_star_handler. expr_star_handler has not been merged to master so let's talk about `stmt_star_handler`_. From 364a276bd669a6778abebb4a500957c8b821a085 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 5 May 2018 10:49:20 -0700 Subject: [PATCH 116/291] Glitch Boy (Dyto Remix) https://soundcloud.com/qmando/likes --- pyt/cfg/README.rst | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index a6c82297..b2517bdf 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -1,9 +1,12 @@ -`make_cfg`_ is what `__main__.py`_ calls, it takes the Abstract Syntax Tree, creates an ExprVisitor and returns a Control Flow Graph. +`make_cfg`_ is what `__main__.py`_ calls, it takes the Abstract Syntax Tree, creates an `ExprVisitor`_ and returns a Control Flow Graph. .. _make_cfg: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/make_cfg.py#L22-L38 .. _\_\_main\_\_.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/__main__.py#L33-L106 -stmt_visitor.py and expr_visitor.py mirror the `abstract grammar`_ of Python. Statements can contain expressions, but not the other way around. This is why ExprVisitor inherits from StmtVisitor, (which inherits from `ast.NodeVisitor`_ from the standard library.) +`stmt_visitor.py`_ and `expr_visitor.py`_ mirror the `abstract grammar`_ of Python. Statements can contain expressions, but not the other way around. This is why `ExprVisitor`_ inherits from `StmtVisitor`_, (which inherits from `ast.NodeVisitor`_ from the standard library.) + +.. _StmtVisitor: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/stmt_visitor.py#L55 +.. _ExprVisitor: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/expr_visitor.py#L33 This is how ast.NodeVisitor works: @@ -41,12 +44,16 @@ This is the relevant part of the `abstract grammar`_ # Note: stmt* means any number of statements. -Upon visiting an if: statement we will enter visit_If in stmt_visitor.py. Since we know that the test is just one expression, we can just call self.visit() on it. The body could be an infinite number of statements, so we use the `stmt_star_handler`_ function. +Upon visiting an if: statement we will enter visit_If in `stmt_visitor.py`_. Since we know that the test is just one expression, we can just call self.visit() on it. The body could be an infinite number of statements, so we use the `stmt_star_handler`_ function. `stmt_star_handler`_ returns a namedtuple (ConnectStatements) with the first statement, last_statements and break_statements of all of the statements that were in the body of the node. `stmt_star_handler`_ takes care of connecting each statement in the body to the next one. We then connect the test node to the first node in the body (if some_condition -> x = 5) and return a namedtuple (ControlFlowNode) with the test, last_statements and break_statements. +.. _stmt\_visitor.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/stmt_visitor.py + +.. _expr\_visitor.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/expr_visitor.py + .. _stmt_star_handler: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/stmt_visitor.py#L60-L121 From fbd151cb6088ee376b614755ffb3276afe59361e Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 5 May 2018 10:54:00 -0700 Subject: [PATCH 117/291] More links, b/c backticks in markdown are better than rst --- pyt/cfg/README.rst | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index b2517bdf..9cabb67b 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -8,7 +8,7 @@ .. _StmtVisitor: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/stmt_visitor.py#L55 .. _ExprVisitor: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/expr_visitor.py#L33 -This is how ast.NodeVisitor works: +This is how `ast.NodeVisitor`_ works: .. code-block:: python @@ -46,9 +46,14 @@ This is the relevant part of the `abstract grammar`_ Upon visiting an if: statement we will enter visit_If in `stmt_visitor.py`_. Since we know that the test is just one expression, we can just call self.visit() on it. The body could be an infinite number of statements, so we use the `stmt_star_handler`_ function. -`stmt_star_handler`_ returns a namedtuple (ConnectStatements) with the first statement, last_statements and break_statements of all of the statements that were in the body of the node. `stmt_star_handler`_ takes care of connecting each statement in the body to the next one. +`stmt_star_handler`_ returns a namedtuple (`ConnectStatements`_) with the first statement, last_statements and break_statements of all of the statements that were in the body of the node. `stmt_star_handler`_ takes care of connecting each statement in the body to the next one. -We then connect the test node to the first node in the body (if some_condition -> x = 5) and return a namedtuple (ControlFlowNode) with the test, last_statements and break_statements. +We then connect the test node to the first node in the body (if some_condition -> x = 5) and return a namedtuple (`ControlFlowNode`_) with the test, last_statements and break_statements. + + +.. _ConnectStatements: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/stmt_visitor_helper.py#L15 + +.. _ControlFlowNode: https://github.com/python-security/pyt/blob/re_organize_code/pyt/core/node_types.py#L7 .. _stmt\_visitor.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/stmt_visitor.py From 56b9c88bf33b6596c505527ad8f845bd4accfadc Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 5 May 2018 10:56:21 -0700 Subject: [PATCH 118/291] More links, b/c backticks in markdown are better than rst --- pyt/cfg/README.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index 9cabb67b..5d6ab0c4 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -1,7 +1,9 @@ -`make_cfg`_ is what `__main__.py`_ calls, it takes the Abstract Syntax Tree, creates an `ExprVisitor`_ and returns a Control Flow Graph. +`make_cfg`_ is what `__main__.py`_ calls, it takes the `Abstract Syntax Tree`_, creates an `ExprVisitor`_ and returns a `Control Flow Graph`_. .. _make_cfg: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/make_cfg.py#L22-L38 .. _\_\_main\_\_.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/__main__.py#L33-L106 +.. _Abstract Syntax Tree: https://en.wikipedia.org/wiki/Abstract_syntax_tree +.. _Control Flow Graph: https://en.wikipedia.org/wiki/Control_flow_graph `stmt_visitor.py`_ and `expr_visitor.py`_ mirror the `abstract grammar`_ of Python. Statements can contain expressions, but not the other way around. This is why `ExprVisitor`_ inherits from `StmtVisitor`_, (which inherits from `ast.NodeVisitor`_ from the standard library.) From 95ecff439115a575b83ac0b0171605f3ab6e5d16 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 5 May 2018 10:57:51 -0700 Subject: [PATCH 119/291] semicolon instead of parenthesis --- pyt/cfg/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index 5d6ab0c4..586a8875 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -5,7 +5,7 @@ .. _Abstract Syntax Tree: https://en.wikipedia.org/wiki/Abstract_syntax_tree .. _Control Flow Graph: https://en.wikipedia.org/wiki/Control_flow_graph -`stmt_visitor.py`_ and `expr_visitor.py`_ mirror the `abstract grammar`_ of Python. Statements can contain expressions, but not the other way around. This is why `ExprVisitor`_ inherits from `StmtVisitor`_, (which inherits from `ast.NodeVisitor`_ from the standard library.) +`stmt_visitor.py`_ and `expr_visitor.py`_ mirror the `abstract grammar`_ of Python. Statements can contain expressions, but not the other way around. This is why `ExprVisitor`_ inherits from `StmtVisitor`_, which inherits from `ast.NodeVisitor`_; from the standard library.) .. _StmtVisitor: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/stmt_visitor.py#L55 .. _ExprVisitor: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/expr_visitor.py#L33 From 8e197ade8988548e244556a4c64f03bb031d36c3 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 5 May 2018 10:58:26 -0700 Subject: [PATCH 120/291] del closing parenthesis --- pyt/cfg/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index 586a8875..0e620b85 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -5,7 +5,7 @@ .. _Abstract Syntax Tree: https://en.wikipedia.org/wiki/Abstract_syntax_tree .. _Control Flow Graph: https://en.wikipedia.org/wiki/Control_flow_graph -`stmt_visitor.py`_ and `expr_visitor.py`_ mirror the `abstract grammar`_ of Python. Statements can contain expressions, but not the other way around. This is why `ExprVisitor`_ inherits from `StmtVisitor`_, which inherits from `ast.NodeVisitor`_; from the standard library.) +`stmt_visitor.py`_ and `expr_visitor.py`_ mirror the `abstract grammar`_ of Python. Statements can contain expressions, but not the other way around. This is why `ExprVisitor`_ inherits from `StmtVisitor`_, which inherits from `ast.NodeVisitor`_; from the standard library. .. _StmtVisitor: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/stmt_visitor.py#L55 .. _ExprVisitor: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/expr_visitor.py#L33 From 1d0f4445b8653ee82d4f0f821c6dcfb710e77978 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 5 May 2018 11:01:04 -0700 Subject: [PATCH 121/291] del closing parenthesis --- pyt/cfg/README.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyt/cfg/README.rst b/pyt/cfg/README.rst index 0e620b85..4f561c77 100644 --- a/pyt/cfg/README.rst +++ b/pyt/cfg/README.rst @@ -46,13 +46,15 @@ This is the relevant part of the `abstract grammar`_ # Note: stmt* means any number of statements. -Upon visiting an if: statement we will enter visit_If in `stmt_visitor.py`_. Since we know that the test is just one expression, we can just call self.visit() on it. The body could be an infinite number of statements, so we use the `stmt_star_handler`_ function. +Upon visiting an if: statement we will enter `visit_If`_ in `stmt_visitor.py`_. Since we know that the test is just one expression, we can just call self.visit() on it. The body could be an infinite number of statements, so we use the `stmt_star_handler`_ function. `stmt_star_handler`_ returns a namedtuple (`ConnectStatements`_) with the first statement, last_statements and break_statements of all of the statements that were in the body of the node. `stmt_star_handler`_ takes care of connecting each statement in the body to the next one. We then connect the test node to the first node in the body (if some_condition -> x = 5) and return a namedtuple (`ControlFlowNode`_) with the test, last_statements and break_statements. +.. _visit\_If: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/stmt_visitor.py#L208-L232 + .. _ConnectStatements: https://github.com/python-security/pyt/blob/re_organize_code/pyt/cfg/stmt_visitor_helper.py#L15 .. _ControlFlowNode: https://github.com/python-security/pyt/blob/re_organize_code/pyt/core/node_types.py#L7 From af3de1fbd51b198a203fb4d6afac3806bac437b5 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 5 May 2018 11:04:54 -0700 Subject: [PATCH 122/291] Add ast link to pyt/ readme --- pyt/README.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyt/README.rst b/pyt/README.rst index 5dc8255e..7d69b9ce 100644 --- a/pyt/README.rst +++ b/pyt/README.rst @@ -15,7 +15,7 @@ Step 1 Step 2 - Generate the Abstract Syntax Tree (AST). + Generate the `Abstract Syntax Tree (AST)`_. Essentially done in these lines of code with the `ast`_ module: @@ -26,6 +26,7 @@ Step 2 `generate_ast`_ in `ast_helper.py`_ + .. _Abstract Syntax Tree (AST): https://en.wikipedia.org/wiki/Abstract_syntax_tree .. _ast: https://docs.python.org/3/library/ast.html .. _generate_ast: https://github.com/python-security/pyt/blob/re_organize_code/pyt/core/ast_helper.py#L24 .. _ast_helper.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/core/ast_helper.py From e93ad98463aa5ee7ed1e3fe1b3c13f81a5ef5835 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Mon, 7 May 2018 20:58:08 -0700 Subject: [PATCH 123/291] Update README.rst --- README.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.rst b/README.rst index 3883e331..2488f42d 100644 --- a/README.rst +++ b/README.rst @@ -126,3 +126,10 @@ Install dependencies smmap (0.9.0) In the future, just type ``source ~/a_folder/bin/activate`` to start developing. + +How It Works +============ + +Soon you will find a README.rst in every directory in the pyt folder, `start here`_. + +.. _start here: https://github.com/python-security/pyt/tree/re_organize_code/pyt From 2ebe5951a7ed940cb38b16035ce55ba2c0ab7bee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Thu, 10 May 2018 16:43:15 +0300 Subject: [PATCH 124/291] new args --- pyt/__main__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index b81e6f03..90b8ad7d 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -93,7 +93,9 @@ def parse_args(args): help='Will ask you about each vulnerability chain and blackbox nodes.', action='http://github.com/store_true', default=False) - + parser.add_argument('-r', '--recursive', dest='recursive', + action='http://github.com/store_true', help='find and process files in subdirectories') + parser.add_argument('-t', '--trigger-word-file', help='Input trigger word file.', type=str, @@ -238,6 +240,9 @@ def main(command_line_args=sys.argv[1:]): elif args.trim_reassigned_in: ui_mode = UImode.TRIM + recursivePath = os.path.normpath(args.recursive) + print(recursivePath) + path = os.path.normpath(args.filepath) cfg_list = list() if args.ignore_nosec: From ef3a21d4b9b8b67bfa48869f09bb09b72f3b12c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Fri, 11 May 2018 22:22:34 +0300 Subject: [PATCH 125/291] added recursive args --- pyt/__main__.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 90b8ad7d..903fd158 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -59,6 +59,9 @@ def parse_args(args): subparsers = parser.add_subparsers() + entry_group.add_argument('-r', '--recursive', + help='find and process files in subdirectories', + type=str) entry_group = parser.add_mutually_exclusive_group(required=True) entry_group.add_argument('-f', '--filepath', help='Path to the file that should be analysed.', @@ -240,10 +243,17 @@ def main(command_line_args=sys.argv[1:]): elif args.trim_reassigned_in: ui_mode = UImode.TRIM - recursivePath = os.path.normpath(args.recursive) - print(recursivePath) - - path = os.path.normpath(args.filepath) + if args.recursive: + file_list = [] + for root, dirs, files in os.walk(args.recursive): + for f in files: + fullpath = os.path.join(root, f) + if os.path.splitext(fullpath)[1] == '.py': + file_list.append(fullpath) + print(file_list) + + if args.filepath: + path = os.path.normpath(args.filepath) cfg_list = list() if args.ignore_nosec: nosec_lines = set() From 38be6e2855861e7e6f5d7934af0dc52ff24f1059 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Fri, 11 May 2018 22:33:18 +0300 Subject: [PATCH 126/291] Update __main__.py --- pyt/__main__.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 903fd158..8655080a 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -250,11 +250,8 @@ def main(command_line_args=sys.argv[1:]): fullpath = os.path.join(root, f) if os.path.splitext(fullpath)[1] == '.py': file_list.append(fullpath) - print(file_list) - - if args.filepath: - path = os.path.normpath(args.filepath) - cfg_list = list() + path = fullpath + cfg_list = list() if args.ignore_nosec: nosec_lines = set() else: @@ -336,6 +333,10 @@ def main(command_line_args=sys.argv[1:]): nosec_lines ) + if args.filepath: + path = os.path.normpath(args.filepath) + + if args.baseline: vulnerabilities = get_vulnerabilities_not_in_baseline(vulnerabilities, args.baseline) From 759f632f823005b9ea6c21ff629b1197e73c8745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Fri, 11 May 2018 22:35:25 +0300 Subject: [PATCH 127/291] Update __main__.py --- pyt/__main__.py | 160 ++++++++++++++++++++++++------------------------ 1 file changed, 80 insertions(+), 80 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 8655080a..1cd4f204 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -251,87 +251,87 @@ def main(command_line_args=sys.argv[1:]): if os.path.splitext(fullpath)[1] == '.py': file_list.append(fullpath) path = fullpath - cfg_list = list() - if args.ignore_nosec: - nosec_lines = set() - else: - file = open(path, "r") - lines = file.readlines() - nosec_lines = set( - lineno for - (lineno, line) in enumerate(lines, start=1) - if '#nosec' in line or '# nosec' in line) - - if args.git_repos: - repos = get_repos(args.git_repos) - for repo in repos: - repo.clone() - vulnerabilities = analyse_repo(args, repo, analysis, ui_mode, nosec_lines) - if args.json: - json.report(vulnerabilities, sys.stdout) - else: - text.report(vulnerabilities, sys.stdout) - if not vulnerabilities: - repo.clean_up() - exit() - - - if args.which == 'search': - set_github_api_token() - scan_github( - args.search_string, - args.start_date, - analysis, - analyse_repo, - args.csv_path, - ui_mode, - args - ) - exit() - - directory = None - if args.project_root: - directory = os.path.normpath(args.project_root) - else: - directory = os.path.dirname(path) - project_modules = get_modules(directory) - local_modules = get_directory_modules(directory) - - tree = generate_ast(path, python_2=args.python_2) - - cfg_list = list() - cfg = make_cfg( - tree, - project_modules, - local_modules, - path - ) - cfg_list.append(cfg) - framework_route_criteria = is_flask_route_function - if args.adaptor: - if args.adaptor.lower().startswith('e'): - framework_route_criteria = is_function - elif args.adaptor.lower().startswith('p'): - framework_route_criteria = is_function_without_leading_ - elif args.adaptor.lower().startswith('d'): - framework_route_criteria = is_django_view_function - # Add all the route functions to the cfg_list - FrameworkAdaptor(cfg_list, project_modules, local_modules, framework_route_criteria) + + if args.ignore_nosec: + nosec_lines = set() + else: + file = open(path, "r") + lines = file.readlines() + nosec_lines = set( + lineno for + (lineno, line) in enumerate(lines, start=1) + if '#nosec' in line or '# nosec' in line) + + if args.git_repos: + repos = get_repos(args.git_repos) + for repo in repos: + repo.clone() + vulnerabilities = analyse_repo(args, repo, analysis, ui_mode, nosec_lines) + if args.json: + json.report(vulnerabilities, sys.stdout) + else: + text.report(vulnerabilities, sys.stdout) + if not vulnerabilities: + repo.clean_up() + exit() + + + if args.which == 'search': + set_github_api_token() + scan_github( + args.search_string, + args.start_date, + analysis, + analyse_repo, + args.csv_path, + ui_mode, + args + ) + exit() + + directory = None + if args.project_root: + directory = os.path.normpath(args.project_root) + else: + directory = os.path.dirname(path) + project_modules = get_modules(directory) + local_modules = get_directory_modules(directory) + + tree = generate_ast(path, python_2=args.python_2) - initialize_constraint_table(cfg_list) - - analyse(cfg_list, analysis_type=analysis) - - vulnerabilities = find_vulnerabilities( - cfg_list, - analysis, - ui_mode, - VulnerabilityFiles( - args.blackbox_mapping_file, - args.trigger_word_file - ), - nosec_lines - ) + cfg_list = list() + cfg = make_cfg( + tree, + project_modules, + local_modules, + path + ) + cfg_list.append(cfg) + framework_route_criteria = is_flask_route_function + if args.adaptor: + if args.adaptor.lower().startswith('e'): + framework_route_criteria = is_function + elif args.adaptor.lower().startswith('p'): + framework_route_criteria = is_function_without_leading_ + elif args.adaptor.lower().startswith('d'): + framework_route_criteria = is_django_view_function + # Add all the route functions to the cfg_list + FrameworkAdaptor(cfg_list, project_modules, local_modules, framework_route_criteria) + + initialize_constraint_table(cfg_list) + + analyse(cfg_list, analysis_type=analysis) + + vulnerabilities = find_vulnerabilities( + cfg_list, + analysis, + ui_mode, + VulnerabilityFiles( + args.blackbox_mapping_file, + args.trigger_word_file + ), + nosec_lines + ) if args.filepath: path = os.path.normpath(args.filepath) From ed38dbb136a8842b83bfa57cb92545d33804f659 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Sun, 13 May 2018 00:21:23 +0300 Subject: [PATCH 128/291] Created discover_files() function --- pyt/__main__.py | 204 +++++++++++++++++++++++++----------------------- 1 file changed, 106 insertions(+), 98 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 1cd4f204..aa17a887 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -59,10 +59,10 @@ def parse_args(args): subparsers = parser.add_subparsers() - entry_group.add_argument('-r', '--recursive', - help='find and process files in subdirectories', - type=str) entry_group = parser.add_mutually_exclusive_group(required=True) + entry_group.add_argument('-r', '--recursive', + help='Output filename.', + type=str) entry_group.add_argument('-f', '--filepath', help='Path to the file that should be analysed.', type=str) @@ -96,9 +96,7 @@ def parse_args(args): help='Will ask you about each vulnerability chain and blackbox nodes.', action='http://github.com/store_true', default=False) - parser.add_argument('-r', '--recursive', dest='recursive', - action='http://github.com/store_true', help='find and process files in subdirectories') - + parser.add_argument('-t', '--trigger-word-file', help='Input trigger word file.', type=str, @@ -125,6 +123,9 @@ def parse_args(args): help='Prints JSON instead of report.', action='http://github.com/store_true', default=False) + parser.add_argument('-x', '--exclude', dest='excluded_paths', + action='http://github.com/store', + default='', help='Separate files with commas') analysis_group = parser.add_mutually_exclusive_group() analysis_group.add_argument('-li', '--liveness', @@ -227,6 +228,18 @@ def analyse_repo(args, github_repo, analysis_type, ui_mode, nosec_lines): ) return vulnerabilities +def discover_files(directory_path, excluded_files): + file_list = [] + excluded_list = excluded_files.split(",") + + for root, dirs, files in os.walk(directory_path): + for f in files: + fullpath = os.path.join(root, f) + if os.path.splitext(fullpath)[1] == '.py' and fullpath.split("/")[-1] not in excluded_list: + file_list.append(fullpath) + + return(file_list) + def main(command_line_args=sys.argv[1:]): args = parse_args(command_line_args) @@ -243,99 +256,94 @@ def main(command_line_args=sys.argv[1:]): elif args.trim_reassigned_in: ui_mode = UImode.TRIM - if args.recursive: - file_list = [] - for root, dirs, files in os.walk(args.recursive): - for f in files: - fullpath = os.path.join(root, f) - if os.path.splitext(fullpath)[1] == '.py': - file_list.append(fullpath) - path = fullpath - - if args.ignore_nosec: - nosec_lines = set() - else: - file = open(path, "r") - lines = file.readlines() - nosec_lines = set( - lineno for - (lineno, line) in enumerate(lines, start=1) - if '#nosec' in line or '# nosec' in line) - - if args.git_repos: - repos = get_repos(args.git_repos) - for repo in repos: - repo.clone() - vulnerabilities = analyse_repo(args, repo, analysis, ui_mode, nosec_lines) - if args.json: - json.report(vulnerabilities, sys.stdout) - else: - text.report(vulnerabilities, sys.stdout) - if not vulnerabilities: - repo.clean_up() - exit() - - - if args.which == 'search': - set_github_api_token() - scan_github( - args.search_string, - args.start_date, - analysis, - analyse_repo, - args.csv_path, - ui_mode, - args - ) - exit() - - directory = None - if args.project_root: - directory = os.path.normpath(args.project_root) - else: - directory = os.path.dirname(path) - project_modules = get_modules(directory) - local_modules = get_directory_modules(directory) - - tree = generate_ast(path, python_2=args.python_2) - - cfg_list = list() - cfg = make_cfg( - tree, - project_modules, - local_modules, - path - ) - cfg_list.append(cfg) - framework_route_criteria = is_flask_route_function - if args.adaptor: - if args.adaptor.lower().startswith('e'): - framework_route_criteria = is_function - elif args.adaptor.lower().startswith('p'): - framework_route_criteria = is_function_without_leading_ - elif args.adaptor.lower().startswith('d'): - framework_route_criteria = is_django_view_function - # Add all the route functions to the cfg_list - FrameworkAdaptor(cfg_list, project_modules, local_modules, framework_route_criteria) - - initialize_constraint_table(cfg_list) - - analyse(cfg_list, analysis_type=analysis) - - vulnerabilities = find_vulnerabilities( - cfg_list, - analysis, - ui_mode, - VulnerabilityFiles( - args.blackbox_mapping_file, - args.trigger_word_file - ), - nosec_lines - ) - - if args.filepath: - path = os.path.normpath(args.filepath) + directory_path = os.path.normpath(args.recursive) + excluded_files = args.excluded_paths + test = discover_files(directory_path, excluded_files) + + print(test) + + path = os.path.normpath(args.filepath) + cfg_list = list() + if args.ignore_nosec: + nosec_lines = set() + else: + file = open(path, "r") + lines = file.readlines() + nosec_lines = set( + lineno for + (lineno, line) in enumerate(lines, start=1) + if '#nosec' in line or '# nosec' in line) + + if args.git_repos: + repos = get_repos(args.git_repos) + for repo in repos: + repo.clone() + vulnerabilities = analyse_repo(args, repo, analysis, ui_mode, nosec_lines) + if args.json: + json.report(vulnerabilities, sys.stdout) + else: + text.report(vulnerabilities, sys.stdout) + if not vulnerabilities: + repo.clean_up() + exit() + + + if args.which == 'search': + set_github_api_token() + scan_github( + args.search_string, + args.start_date, + analysis, + analyse_repo, + args.csv_path, + ui_mode, + args + ) + exit() + + directory = None + if args.project_root: + directory = os.path.normpath(args.project_root) + else: + directory = os.path.dirname(path) + project_modules = get_modules(directory) + local_modules = get_directory_modules(directory) + tree = generate_ast(path, python_2=args.python_2) + + cfg_list = list() + cfg = make_cfg( + tree, + project_modules, + local_modules, + path + ) + cfg_list.append(cfg) + framework_route_criteria = is_flask_route_function + if args.adaptor: + if args.adaptor.lower().startswith('e'): + framework_route_criteria = is_function + elif args.adaptor.lower().startswith('p'): + framework_route_criteria = is_function_without_leading_ + elif args.adaptor.lower().startswith('d'): + framework_route_criteria = is_django_view_function + # Add all the route functions to the cfg_list + FrameworkAdaptor(cfg_list, project_modules, local_modules, framework_route_criteria) + + initialize_constraint_table(cfg_list) + + analyse(cfg_list, analysis_type=analysis) + + vulnerabilities = find_vulnerabilities( + cfg_list, + analysis, + ui_mode, + VulnerabilityFiles( + args.blackbox_mapping_file, + args.trigger_word_file + ), + nosec_lines + ) if args.baseline: vulnerabilities = get_vulnerabilities_not_in_baseline(vulnerabilities, args.baseline) From e2c25636eab4fe330c2be7b22df1f10006ebc598 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 19 May 2018 16:34:42 -0700 Subject: [PATCH 129/291] Update README.rst --- README.rst | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 2488f42d..073d38fc 100644 --- a/README.rst +++ b/README.rst @@ -73,6 +73,12 @@ Running an individual test file ``python3 -m unittest tests.import_test`` Running an individual test ``python3 -m unittest tests.import_test.ImportTest.test_import`` +How It Works +============ + +Soon you will find a README.rst in every directory in the pyt folder, `start here`_. + +.. _start here: https://github.com/python-security/pyt/tree/re_organize_code/pyt Contributions ============= @@ -126,10 +132,3 @@ Install dependencies smmap (0.9.0) In the future, just type ``source ~/a_folder/bin/activate`` to start developing. - -How It Works -============ - -Soon you will find a README.rst in every directory in the pyt folder, `start here`_. - -.. _start here: https://github.com/python-security/pyt/tree/re_organize_code/pyt From b5f7e50410900c12c1e8334f183259015df16cf5 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 5 Jun 2018 09:46:58 -0700 Subject: [PATCH 130/291] Edit vulns/README.rst --- pyt/vulnerabilities/README.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pyt/vulnerabilities/README.rst b/pyt/vulnerabilities/README.rst index 3ba5b13c..180020d6 100644 --- a/pyt/vulnerabilities/README.rst +++ b/pyt/vulnerabilities/README.rst @@ -1 +1,14 @@ Coming soon. + +There are a few different kinds of vulnerabilities + +Regular +Sanitised +Unknown + + +How we find secondary nodes + +How we find sources/sinks + +How def-use chains are used From 3ac883c71eb46387804a31726c643843604bdc72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Wed, 6 Jun 2018 14:55:01 +0300 Subject: [PATCH 131/291] added recursive option --- pyt/usage.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pyt/usage.py b/pyt/usage.py index 4930eb02..e768d38a 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -91,7 +91,18 @@ def _add_optional_group(parser): action='http://github.com/store_true', help='do not skip lines with # nosec comments' ) - + optional_group.add_argument( + '-r', '--recursive', + help='Output filename.', + type=str + ) + optional_group.add_argument( + '-x', '--exclude', + dest='excluded_paths', + action='http://github.com/store', + default='', + help='Separate files with commas' + ) def _add_print_group(parser): print_group = parser.add_argument_group('print arguments') From e246104334f9a824557b989859554126d1de041f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Wed, 6 Jun 2018 14:56:11 +0300 Subject: [PATCH 132/291] discover_files --- pyt/__main__.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pyt/__main__.py b/pyt/__main__.py index b2302113..ce9e0355 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -30,6 +30,19 @@ ) +def discover_files(directory_path, excluded_files): + file_list = [] + excluded_list = excluded_files.split(",") + + for root, dirs, files in os.walk(directory_path): + for f in files: + fullpath = os.path.join(root, f) + if os.path.splitext(fullpath)[1] == '.py' and fullpath.split("/")[-1] not in excluded_list: + file_list.append(fullpath) + + return(file_list) + + def main(command_line_args=sys.argv[1:]): args = parse_args(command_line_args) @@ -40,6 +53,10 @@ def main(command_line_args=sys.argv[1:]): ui_mode = UImode.TRIM path = os.path.normpath(args.filepath) + directory_path = os.path.normpath(args.recursive) + excluded_files = args.excluded_paths + test = discover_files(directory_path, excluded_files) #just for see files in directory + print(test) if args.ignore_nosec: nosec_lines = set() From 8d43c95f75b18eb7dbcc4ce96ed9ce6e56535c96 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Wed, 6 Jun 2018 18:23:21 -0700 Subject: [PATCH 133/291] Update README.rst --- README.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index bfb55e49..3593d1e2 100644 --- a/README.rst +++ b/README.rst @@ -50,6 +50,13 @@ PyT can also be installed from source. To do so, clone the repo, and then run: python3 setup.py install +How It Works +============ + +Soon you will find a README.rst in every directory in the pyt folder, `start here`_. + +.. _start here: https://github.com/python-security/pyt/tree/re_organize_code/pyt + Usage ===== @@ -89,13 +96,6 @@ Usage -i, --interactive Will ask you about each blackbox function call in vulnerability chains. -How It Works -============ - -Soon you will find a README.rst in every directory in the pyt folder, `start here`_. - -.. _start here: https://github.com/python-security/pyt/tree/re_organize_code/pyt - Usage from Source ================= From 2cbac72055bb6c18c7ee5160fabd4d54d40db3a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Thu, 7 Jun 2018 14:50:58 +0300 Subject: [PATCH 134/291] added recursive, targets --- pyt/usage.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pyt/usage.py b/pyt/usage.py index e768d38a..a92f42a4 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -29,11 +29,11 @@ def valid_date(s): def _add_required_group(parser): required_group = parser.add_argument_group('required arguments') - required_group.add_argument( + '''required_group.add_argument( '-f', '--filepath', help='Path to the file that should be analysed.', type=str - ) + )''' def _add_optional_group(parser): @@ -92,9 +92,8 @@ def _add_optional_group(parser): help='do not skip lines with # nosec comments' ) optional_group.add_argument( - '-r', '--recursive', - help='Output filename.', - type=str + '-r', '--recursive', dest='recursive', + action='http://github.com/store_true', help='find and process files in subdirectories' ) optional_group.add_argument( '-x', '--exclude', @@ -102,7 +101,11 @@ def _add_optional_group(parser): action='http://github.com/store', default='', help='Separate files with commas' - ) + ) + optional_group.add_argument( + 'targets', metavar='targets', type=str, nargs='*', + help='source file(s) or directory(s) to be tested' + ) def _add_print_group(parser): print_group = parser.add_argument_group('print arguments') @@ -121,8 +124,8 @@ def _add_print_group(parser): def _check_required_and_mutually_exclusive_args(parser, args): - if args.filepath is None: - parser.error('The -f/--filepath argument is required') + if args.targets is None: + parser.error('The target argument is required') def parse_args(args): From 7875c8256a7f729dc52f13c9ed819a512d863c3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Thu, 7 Jun 2018 14:51:33 +0300 Subject: [PATCH 135/291] update discover_files() --- pyt/__main__.py | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index ce9e0355..839d7758 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -30,17 +30,23 @@ ) -def discover_files(directory_path, excluded_files): - file_list = [] +def discover_files(targets, excluded_files, recursive=False): + file_list = list() + included_files = list() excluded_list = excluded_files.split(",") - for root, dirs, files in os.walk(directory_path): - for f in files: - fullpath = os.path.join(root, f) - if os.path.splitext(fullpath)[1] == '.py' and fullpath.split("/")[-1] not in excluded_list: - file_list.append(fullpath) - - return(file_list) + for target in targets: + if os.path.isdir(target): + if recursive: + for root, dirs, files in os.walk(target): + for f in files: + fullpath = os.path.join(root, f) + if os.path.splitext(fullpath)[1] == '.py' and fullpath.split("/")[-1] not in excluded_list: + included_files.append(fullpath) + else: + if targets not in excluded_list: + included_files.append(targets[0]) + return(included_files) def main(command_line_args=sys.argv[1:]): @@ -52,12 +58,16 @@ def main(command_line_args=sys.argv[1:]): elif args.trim_reassigned_in: ui_mode = UImode.TRIM - path = os.path.normpath(args.filepath) - directory_path = os.path.normpath(args.recursive) + + + targets = args.targets excluded_files = args.excluded_paths - test = discover_files(directory_path, excluded_files) #just for see files in directory + recursive = args.recursive + test = discover_files(targets, excluded_files, recursive) #just for see files in directory print(test) + path = os.path.normpath(args.filepath) + if args.ignore_nosec: nosec_lines = set() else: From ca0b2d764cdaab13ec2e1f98253b122bc0cf5819 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Thu, 7 Jun 2018 14:53:04 +0300 Subject: [PATCH 136/291] removed file_list --- pyt/__main__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 839d7758..61319bb1 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -31,7 +31,6 @@ def discover_files(targets, excluded_files, recursive=False): - file_list = list() included_files = list() excluded_list = excluded_files.split(",") From 892073a30fc5fc03c9ffcf5fde0de97adc2c526d Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 8 Jun 2018 17:37:25 -0700 Subject: [PATCH 137/291] Update README.rst --- pyt/vulnerabilities/README.rst | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/pyt/vulnerabilities/README.rst b/pyt/vulnerabilities/README.rst index 180020d6..b8ff5d24 100644 --- a/pyt/vulnerabilities/README.rst +++ b/pyt/vulnerabilities/README.rst @@ -1,6 +1,24 @@ Coming soon. -There are a few different kinds of vulnerabilities +The first thing we do is `find all sources and sinks in the file`_, and then `loop through each pair of source and sink to see if a source reaches a sink`_. + +Once we obtain def-use chains, we `find all of the paths from source to sink`_. + + + +After we get each vulnerability chain, we see `how_vulnerable`_ it is + +There are a few different `vulnerability types`_ used in `how_vulnerable`_. + +.. _loop through each pair of source and sink to see if a source reaches a sink: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerabilities.py#L452-L464 +.. _find all sources and sinks in the file: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerabilities.py#L29-L59 + +.. _find all of the paths from source to sink: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerabilities.py#L397-L405 + +.. _vulnerability types: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerability_helper.py#L8-L12 + +.. _how_vulnerable: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerabilities.py#L266-L323 + Regular Sanitised From b5d3641d6a1b139168cdb140883f5da23a9ddcc4 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 8 Jun 2018 17:38:36 -0700 Subject: [PATCH 138/291] Update README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 3593d1e2..e9597cd1 100644 --- a/README.rst +++ b/README.rst @@ -55,7 +55,7 @@ How It Works Soon you will find a README.rst in every directory in the pyt folder, `start here`_. -.. _start here: https://github.com/python-security/pyt/tree/re_organize_code/pyt +.. _start here: https://github.com/python-security/pyt/tree/master/pyt Usage ===== From 6f76d6df97ebd9f2d96879613130e41cf90549aa Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 8 Jun 2018 17:59:04 -0700 Subject: [PATCH 139/291] Update README.rst --- pyt/vulnerabilities/README.rst | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/pyt/vulnerabilities/README.rst b/pyt/vulnerabilities/README.rst index b8ff5d24..998b59ad 100644 --- a/pyt/vulnerabilities/README.rst +++ b/pyt/vulnerabilities/README.rst @@ -1,4 +1,5 @@ -Coming soon. +`find_vulnerabilities`_ is what `__main__.py`_ calls, it takes a list of `CFGs`_ and returns a list of vulnerabilities. + The first thing we do is `find all sources and sinks in the file`_, and then `loop through each pair of source and sink to see if a source reaches a sink`_. @@ -10,6 +11,10 @@ After we get each vulnerability chain, we see `how_vulnerable`_ it is There are a few different `vulnerability types`_ used in `how_vulnerable`_. +.. _find_vulnerabilities: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerabilities.py#L467-L502 +.. _\_\_main\_\_.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/__main__.py#L33-L106 +.. _CFGs: https://github.com/python-security/pyt/tree/re_organize_code/pyt/cfg + .. _loop through each pair of source and sink to see if a source reaches a sink: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerabilities.py#L452-L464 .. _find all sources and sinks in the file: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerabilities.py#L29-L59 @@ -20,9 +25,34 @@ There are a few different `vulnerability types`_ used in `how_vulnerable`_. .. _how_vulnerable: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerabilities.py#L266-L323 +Types of Vulnerabilities +======================== + Regular +and example code and output + Sanitised + +.. code-block:: python + File: examples/vulnerable_code/XSS_sanitised.py + > User input at line 7, source "request.args.get(": + ~call_1 = ret_request.args.get('param', 'not set') + Reassigned in: + File: examples/vulnerable_code/XSS_sanitised.py + > Line 7: param = ~call_1 + File: examples/vulnerable_code/XSS_sanitised.py + > Line 9: ~call_2 = ret_Markup.escape(param) + File: examples/vulnerable_code/XSS_sanitised.py + > Line 9: param = ~call_2 + File: examples/vulnerable_code/XSS_sanitised.py + > reaches line 12, sink "replace(": + ~call_5 = ret_html.replace('{{ param }}', param) + This vulnerability is sanitised by: Label: ~call_2 = ret_Markup.escape(param) + +and example code and output + Unknown +and example code and output How we find secondary nodes From feaf94b9fb79f2298fa29a46357ba2bb164851a6 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 8 Jun 2018 17:59:34 -0700 Subject: [PATCH 140/291] Update README.rst --- pyt/vulnerabilities/README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/pyt/vulnerabilities/README.rst b/pyt/vulnerabilities/README.rst index 998b59ad..48ca316e 100644 --- a/pyt/vulnerabilities/README.rst +++ b/pyt/vulnerabilities/README.rst @@ -60,3 +60,4 @@ How we find secondary nodes How we find sources/sinks How def-use chains are used +h From ae8c599f536bdf5cd8a14262a00eb5e8f3fba6b4 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 8 Jun 2018 18:02:39 -0700 Subject: [PATCH 141/291] Update README.rst --- pyt/vulnerabilities/README.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pyt/vulnerabilities/README.rst b/pyt/vulnerabilities/README.rst index 48ca316e..1dde3857 100644 --- a/pyt/vulnerabilities/README.rst +++ b/pyt/vulnerabilities/README.rst @@ -34,6 +34,27 @@ and example code and output Sanitised .. code-block:: python + :linenos: + + from flask import Flask, request, make_response, Markup + + app = Flask(__name__) + + @app.route('/XSS_param', methods =['GET']) + def XSS1(): + param = request.args.get('param', 'not set') + + param = Markup.escape(param) + + html = open('templates/XSS_param.html').read() + resp = make_response(html.replace('{{ param }}', param)) + return resp + + if __name__ == '__main__': + app.run(debug= True) + +.. code-block:: python + File: examples/vulnerable_code/XSS_sanitised.py > User input at line 7, source "request.args.get(": ~call_1 = ret_request.args.get('param', 'not set') From 36ed86a6992187bc0175fbbe9fca3204d66b38ce Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 8 Jun 2018 18:27:23 -0700 Subject: [PATCH 142/291] Update README.rst --- pyt/vulnerabilities/README.rst | 59 +++++++++++++++++----------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/pyt/vulnerabilities/README.rst b/pyt/vulnerabilities/README.rst index 1dde3857..bf8c4f0b 100644 --- a/pyt/vulnerabilities/README.rst +++ b/pyt/vulnerabilities/README.rst @@ -24,50 +24,46 @@ There are a few different `vulnerability types`_ used in `how_vulnerable`_. .. _how_vulnerable: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerabilities.py#L266-L323 +Configuration +============= +The hard-coded list of sources and sinks can be found in the `vulnerability_definitions`_ folder, currently `all_trigger_words.pyt`_ is used by default. + +.. _vulnerability_definitions: https://github.com/python-security/pyt/tree/re_organize_code/pyt/vulnerability_definitions +.. _all_trigger_words.pyt: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerability_definitions/all_trigger_words.pyt Types of Vulnerabilities ======================== -Regular -and example code and output +There are 3 kinds of vulnerabilities reported by PyT whose classes are defined in `vulnerability_helper.py`_: `regular`_, `sanitised`_ and `unknown`_. We report a `sanitised`_ vulnerability when there is a known sanitiser between the source and sink, with `confidence when the sanitiser is an assignment`_ and with `uncertainty if it is potentially sanitised by an if statement`_. Here is an example: -Sanitised +.. _confidence when the sanitiser is an assignment: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerabilities.py#L293 +.. _uncertainty if it is potentially sanitised by an if statement: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerabilities.py#L394 .. code-block:: python - :linenos: - - from flask import Flask, request, make_response, Markup - - app = Flask(__name__) - @app.route('/XSS_param', methods =['GET']) - def XSS1(): - param = request.args.get('param', 'not set') - - param = Markup.escape(param) - - html = open('templates/XSS_param.html').read() - resp = make_response(html.replace('{{ param }}', param)) - return resp - - if __name__ == '__main__': - app.run(debug= True) + 5 @app.route('/XSS_param', methods =['GET']) + 6 def XSS1(): + 7 param = request.args.get('param', 'not set') + 8 safe_param = Markup.escape(param) + 9 html = open('templates/XSS_param.html').read() + 10 resp = make_response(html.replace('{{ param }}', safe_param)) + 11 return resp .. code-block:: python File: examples/vulnerable_code/XSS_sanitised.py > User input at line 7, source "request.args.get(": - ~call_1 = ret_request.args.get('param', 'not set') + ~call_1 = ret_request.args.get('param', 'not set') Reassigned in: - File: examples/vulnerable_code/XSS_sanitised.py - > Line 7: param = ~call_1 - File: examples/vulnerable_code/XSS_sanitised.py - > Line 9: ~call_2 = ret_Markup.escape(param) - File: examples/vulnerable_code/XSS_sanitised.py - > Line 9: param = ~call_2 + File: examples/vulnerable_code/XSS_sanitised.py + > Line 7: param = ~call_1 + File: examples/vulnerable_code/XSS_sanitised.py + > Line 8: ~call_2 = ret_Markup.escape(param) + File: examples/vulnerable_code/XSS_sanitised.py + > Line 8: safe_param = ~call_2 File: examples/vulnerable_code/XSS_sanitised.py - > reaches line 12, sink "replace(": - ~call_5 = ret_html.replace('{{ param }}', param) + > reaches line 10, sink "replace(": + ~call_5 = ret_html.replace('{{ param }}', safe_param) This vulnerability is sanitised by: Label: ~call_2 = ret_Markup.escape(param) and example code and output @@ -82,3 +78,8 @@ How we find sources/sinks How def-use chains are used h + +.. _vulnerability_helper.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerability_helper.py +.. _regular: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerability_helper.py#L42-L91 +.. _sanitised: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerability_helper.py#L94-L119 +.. _unknown: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerability_helper.py#L122-L142 From c1d60e24aadab70f591831f79d9d92c110ff1fd4 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 9 Jun 2018 13:36:09 -0700 Subject: [PATCH 143/291] Update README.rst --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index e9597cd1..af180b63 100644 --- a/README.rst +++ b/README.rst @@ -43,6 +43,7 @@ Install .. code-block:: python pip install python-taint + ✨🍰✨ PyT can also be installed from source. To do so, clone the repo, and then run: From b896da14b19b6ce1654df76bb7008a5c614d9e94 Mon Sep 17 00:00:00 2001 From: cclauss Date: Sat, 9 Jun 2018 23:47:38 +0200 Subject: [PATCH 144/291] --exclude=examples --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 100ed8c2..079eef72 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,9 +6,9 @@ install: - pip install codeclimate-test-reporter flake8 before_script: # stop the build if there are Python syntax errors or undefined names - - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics + - flake8 . --count --exclude=examples --select=E901,E999,F821,F822,F823 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - flake8 . --count --exclude=examples --exit-zero --max-complexity=10 --max-line-length=127 --statistics script: - python -m tests - coverage run -m tests From d9db9dda1046438c91154e0bd3a88231e73334f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Sun, 10 Jun 2018 15:06:05 +0300 Subject: [PATCH 145/291] "targets" must be required --- pyt/usage.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/pyt/usage.py b/pyt/usage.py index a92f42a4..0892536e 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -29,11 +29,10 @@ def valid_date(s): def _add_required_group(parser): required_group = parser.add_argument_group('required arguments') - '''required_group.add_argument( - '-f', '--filepath', - help='Path to the file that should be analysed.', - type=str - )''' + required_group.add_argument( + 'targets', metavar='targets', type=str, nargs='*', + help='source file(s) or directory(s) to be tested' + ) def _add_optional_group(parser): @@ -102,10 +101,7 @@ def _add_optional_group(parser): default='', help='Separate files with commas' ) - optional_group.add_argument( - 'targets', metavar='targets', type=str, nargs='*', - help='source file(s) or directory(s) to be tested' - ) + def _add_print_group(parser): print_group = parser.add_argument_group('print arguments') @@ -125,7 +121,7 @@ def _add_print_group(parser): def _check_required_and_mutually_exclusive_args(parser, args): if args.targets is None: - parser.error('The target argument is required') + parser.error('The targets argument is required') def parse_args(args): From c35ae8179f7e1d22e9b96b0040895b6c85a4bad3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Sun, 10 Jun 2018 15:30:27 +0300 Subject: [PATCH 146/291] created loop for discover_files() --- pyt/__main__.py | 124 ++++++++++++++++++++++++------------------------ 1 file changed, 61 insertions(+), 63 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 61319bb1..8c6b4cb4 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -43,7 +43,7 @@ def discover_files(targets, excluded_files, recursive=False): if os.path.splitext(fullpath)[1] == '.py' and fullpath.split("/")[-1] not in excluded_list: included_files.append(fullpath) else: - if targets not in excluded_list: + if target not in excluded_list: included_files.append(targets[0]) return(included_files) @@ -57,75 +57,73 @@ def main(command_line_args=sys.argv[1:]): elif args.trim_reassigned_in: ui_mode = UImode.TRIM + files = discover_files( + args.targets, + args.excluded_paths, + args.recursive + ) + for path in files: + print(path) + if args.ignore_nosec: + nosec_lines = set() + else: + file = open(path, 'r') + lines = file.readlines() + nosec_lines = set( + lineno for + (lineno, line) in enumerate(lines, start=1) + if '#nosec' in line or '# nosec' in line + ) + + if args.project_root: + directory = os.path.normpath(args.project_root) + else: + directory = os.path.dirname(path) + project_modules = get_modules(directory) + local_modules = get_directory_modules(directory) - targets = args.targets - excluded_files = args.excluded_paths - recursive = args.recursive - test = discover_files(targets, excluded_files, recursive) #just for see files in directory - print(test) - - path = os.path.normpath(args.filepath) + tree = generate_ast(path) - if args.ignore_nosec: - nosec_lines = set() - else: - file = open(path, 'r') - lines = file.readlines() - nosec_lines = set( - lineno for - (lineno, line) in enumerate(lines, start=1) - if '#nosec' in line or '# nosec' in line + cfg = make_cfg( + tree, + project_modules, + local_modules, + path + ) + cfg_list = [cfg] + framework_route_criteria = is_flask_route_function + if args.adaptor: + if args.adaptor.lower().startswith('e'): + framework_route_criteria = is_function + elif args.adaptor.lower().startswith('p'): + framework_route_criteria = is_function_without_leading_ + elif args.adaptor.lower().startswith('d'): + framework_route_criteria = is_django_view_function + # Add all the route functions to the cfg_list + FrameworkAdaptor( + cfg_list, + project_modules, + local_modules, + framework_route_criteria ) - if args.project_root: - directory = os.path.normpath(args.project_root) - else: - directory = os.path.dirname(path) - project_modules = get_modules(directory) - local_modules = get_directory_modules(directory) - - tree = generate_ast(path) - - cfg = make_cfg( - tree, - project_modules, - local_modules, - path - ) - cfg_list = [cfg] - framework_route_criteria = is_flask_route_function - if args.adaptor: - if args.adaptor.lower().startswith('e'): - framework_route_criteria = is_function - elif args.adaptor.lower().startswith('p'): - framework_route_criteria = is_function_without_leading_ - elif args.adaptor.lower().startswith('d'): - framework_route_criteria = is_django_view_function - # Add all the route functions to the cfg_list - FrameworkAdaptor( - cfg_list, - project_modules, - local_modules, - framework_route_criteria - ) - - initialize_constraint_table(cfg_list) - analyse(cfg_list) - vulnerabilities = find_vulnerabilities( - cfg_list, - ui_mode, - args.blackbox_mapping_file, - args.trigger_word_file, - nosec_lines - ) - - if args.baseline: - vulnerabilities = get_vulnerabilities_not_in_baseline( - vulnerabilities, - args.baseline + initialize_constraint_table(cfg_list) + analyse(cfg_list) + vulnerabilities = find_vulnerabilities( + cfg_list, + ui_mode, + args.blackbox_mapping_file, + args.trigger_word_file, + nosec_lines ) + if args.baseline: + vulnerabilities = get_vulnerabilities_not_in_baseline( + vulnerabilities, + args.baseline + ) + if args.json: json.report(vulnerabilities, args.output_file) else: From 9c54d8ccbd1f5f1332836a91482faf31ee2b2a7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Sun, 10 Jun 2018 15:33:38 +0300 Subject: [PATCH 147/291] new params --- tests/usage_test.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/usage_test.py b/tests/usage_test.py index cae390e5..4883c31d 100644 --- a/tests/usage_test.py +++ b/tests/usage_test.py @@ -25,14 +25,14 @@ def test_no_args(self): self.maxDiff = None - EXPECTED = """usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] + EXPECTED = """usage: python -m pyt [-h] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] - [-trim] [-i] + [-r] [-x EXCLUDED_PATHS] [-trim] [-i] + [targets [targets ...]] required arguments: - -f FILEPATH, --filepath FILEPATH - Path to the file that should be analysed. + targets source file(s) or directory(s) to be tested optional arguments: -a ADAPTOR, --adaptor ADAPTOR @@ -52,6 +52,9 @@ def test_no_args(self): -o OUTPUT_FILE, --output OUTPUT_FILE write report to filename --ignore-nosec do not skip lines with # nosec comments + -r, --recursive find and process files in subdirectories + -x EXCLUDED_PATHS, --exclude EXCLUDED_PATHS + Separate files with commas print arguments: -trim, --trim-reassigned-in @@ -62,7 +65,7 @@ def test_no_args(self): self.assertEqual(stdout.getvalue(), EXPECTED) - def test_valid_args_but_no_filepath(self): + '''def test_valid_args_but_no_filepath(self): with self.assertRaises(SystemExit): with capture_sys_output() as (_, stderr): parse_args(['-j']) @@ -73,7 +76,7 @@ def test_valid_args_but_no_filepath(self): [-trim] [-i] python -m pyt: error: The -f/--filepath argument is required\n""" - self.assertEqual(stderr.getvalue(), EXPECTED) + self.assertEqual(stderr.getvalue(), EXPECTED)''' # def test_using_both_mutually_exclusive_args(self): # with self.assertRaises(SystemExit): @@ -89,7 +92,7 @@ def test_valid_args_but_no_filepath(self): def test_normal_usage(self): with capture_sys_output() as (stdout, stderr): - parse_args(['-f', 'foo.py']) + parse_args(['foo.py']) self.assertEqual(stdout.getvalue(), '') self.assertEqual(stderr.getvalue(), '') From 7e3fa0e81448868090ad1c753325ddeb423ae528 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sun, 10 Jun 2018 14:11:43 -0700 Subject: [PATCH 148/291] Update README.rst --- pyt/analysis/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/analysis/README.rst b/pyt/analysis/README.rst index e1ba9ef1..9a12388f 100644 --- a/pyt/analysis/README.rst +++ b/pyt/analysis/README.rst @@ -38,7 +38,7 @@ How does a definition reach? ============================ After we know that a definition reaches a use that we are interested in, -we make what use called `definition-use chains`_ to figure out how the definition +we use what are called `definition-use chains`_ to figure out how the definition reaches the use. This is necessary because there may be more than one path from the definition to the use. Here is the code from `definition_chains.py`_: From 052c850ebc41154ea79377bf2b1dd7591c9238c9 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sun, 10 Jun 2018 14:16:32 -0700 Subject: [PATCH 149/291] Update README.rst --- pyt/analysis/README.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pyt/analysis/README.rst b/pyt/analysis/README.rst index 9a12388f..58f96d4f 100644 --- a/pyt/analysis/README.rst +++ b/pyt/analysis/README.rst @@ -18,7 +18,7 @@ a `reassignment check`_. # Get previous assignments of cfg_node.left_hand_side and remove them from JOIN arrow_result = self.arrow(JOIN, cfg_node.left_hand_side) -As an example, +We do this because, e.g. .. code-block:: python @@ -38,9 +38,9 @@ How does a definition reach? ============================ After we know that a definition reaches a use that we are interested in, -we use what are called `definition-use chains`_ to figure out how the definition -reaches the use. This is necessary because there may be more than one path from -the definition to the use. Here is the code from `definition_chains.py`_: +we use what are called `definition-use chains`_ to figure out how definitions +reach their uses. This is necessary because there may be multiple paths from +definition to use. Here is how we create `definition_chains`_: .. code-block:: python @@ -64,20 +64,20 @@ the definition to the use. Here is the code from `definition_chains.py`_: return def_use .. _definition-use chains: https://en.wikipedia.org/wiki/Use-define_chain -.. _definition_chains.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/analysis/definition_chains.py#L16-L33 +.. _definition_chains: https://github.com/python-security/pyt/blob/re_organize_code/pyt/analysis/definition_chains.py#L16-L33 Additional details ================== -This folder probably will not change at all for the lifetime of the project, +This folder will probably not change for the lifetime of the project, unless we were to implement more advanced analyses like `solving string -constraints`_ or doing `alias analysis`_. Right now and in the foreseeable -future there are more pressing concerns, like handling web frameworks +constraints`_ or doing `alias analysis`_. Right now there are more +pressing concerns, like handling web frameworks and handling all AST node types in the `CFG construction`_. Stefan and Bruno like the `Schwartzbach notes`_, as you will see in some comments. -But looking these two algorithms up will yield countless results, my favorite is +But looking up these two algorithms will yield countless results, my favorite is this `amazing guy from YouTube`_. From 7854cb45f35481278e11cb7726422fcb51da3c0b Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Wed, 13 Jun 2018 19:16:22 -0700 Subject: [PATCH 150/291] [flake8] Just fix flake8 errors for PR #114 --- pyt/__main__.py | 2 +- pyt/cfg/stmt_visitor.py | 2 +- pyt/core/ast_helper.py | 2 +- pyt/core/project_handler.py | 20 ++- .../reaching_definitions_taint_test.py | 62 +++++--- tests/cfg/cfg_test.py | 147 +++++++++++------- .../vulnerabilities_across_files_test.py | 7 +- tests/vulnerabilities/vulnerabilities_test.py | 42 ++--- 8 files changed, 175 insertions(+), 109 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index b2302113..52275da6 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -30,7 +30,7 @@ ) -def main(command_line_args=sys.argv[1:]): +def main(command_line_args=sys.argv[1:]): # noqa: C901 args = parse_args(command_line_args) ui_mode = UImode.NORMAL diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index 06a985e5..c10548c0 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -717,7 +717,7 @@ def append_node(self, node): self.nodes.append(node) return node - def add_module( + def add_module( # noqa: C901 self, module, module_or_package_name, diff --git a/pyt/core/ast_helper.py b/pyt/core/ast_helper.py index e741ac50..17013128 100644 --- a/pyt/core/ast_helper.py +++ b/pyt/core/ast_helper.py @@ -15,7 +15,7 @@ def convert_to_3(path): # pragma: no cover try: print('##### Trying to convert file to Python 3. #####') subprocess.call(['2to3', '-w', path]) - except: + except subprocess.SubprocessError: print('Check if 2to3 is installed. ' 'https://docs.python.org/2/library/2to3.html') exit(1) diff --git a/pyt/core/project_handler.py b/pyt/core/project_handler.py index 7d50b1e8..4a16ff96 100644 --- a/pyt/core/project_handler.py +++ b/pyt/core/project_handler.py @@ -40,12 +40,26 @@ def get_modules(path): for root, directories, filenames in os.walk(path): for filename in filenames: if is_python_file(filename): - directory = os.path.dirname(os.path.realpath(os.path.join(root, filename))).split(module_root)[-1].replace(os.sep, '.') + directory = os.path.dirname( + os.path.realpath( + os.path.join( + root, + filename + ) + ) + ).split(module_root)[-1].replace( + os.sep, # e.g. '/' + '.' + ) directory = directory.replace('.', '', 1) if directory: - modules.append(('.'.join((module_root, directory, filename.replace('.py', ''))), os.path.join(root, filename))) + modules.append( + ('.'.join((module_root, directory, filename.replace('.py', ''))), os.path.join(root, filename)) + ) else: - modules.append(('.'.join((module_root, filename.replace('.py', ''))), os.path.join(root, filename))) + modules.append( + ('.'.join((module_root, filename.replace('.py', ''))), os.path.join(root, filename)) + ) return modules diff --git a/tests/analysis/reaching_definitions_taint_test.py b/tests/analysis/reaching_definitions_taint_test.py index 20f50b5a..3c34508c 100644 --- a/tests/analysis/reaching_definitions_taint_test.py +++ b/tests/analysis/reaching_definitions_taint_test.py @@ -13,8 +13,10 @@ def test_linear_program(self): "Label: ~call_1 = ret_input(): Label: ~call_1 = ret_input()", "Label: x = ~call_1: Label: x = ~call_1, Label: ~call_1 = ret_input()", "Label: y = x - 1: Label: y = x - 1, Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: ~call_2 = ret_print(x): Label: ~call_2 = ret_print(x), Label: y = x - 1, Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: Exit module: Label: ~call_2 = ret_print(x), Label: y = x - 1, Label: x = ~call_1, Label: ~call_1 = ret_input()" + """Label: ~call_2 = ret_print(x): Label: ~call_2 = ret_print(x), Label: y = x - 1, + Label: x = ~call_1, Label: ~call_1 = ret_input()""", + """Label: Exit module: Label: ~call_2 = ret_print(x), Label: y = x - 1, Label: x = ~call_1, + Label: ~call_1 = ret_input()""" ] i = 0 for k, v in constraint_table.items(): @@ -31,8 +33,10 @@ def test_if_program(self): "Label: x = ~call_1: Label: x = ~call_1, Label: ~call_1 = ret_input()", "Label: if x > 0:: Label: x = ~call_1, Label: ~call_1 = ret_input()", "Label: y = x + 1: Label: y = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: ~call_2 = ret_print(x): Label: ~call_2 = ret_print(x), Label: y = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_input()", - "Label: Exit module: Label: ~call_2 = ret_print(x), Label: y = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_input()" + """Label: ~call_2 = ret_print(x): Label: ~call_2 = ret_print(x), Label: y = x + 1, + Label: x = ~call_1, Label: ~call_1 = ret_input()""", + """Label: Exit module: Label: ~call_2 = ret_print(x), Label: y = x + 1, Label: x = ~call_1, + Label: ~call_1 = ret_input()""" ] i = 0 for k, v in constraint_table.items(): @@ -49,16 +53,28 @@ def test_example(self): "Label: x = ~call_1: Label: x = ~call_1, Label: ~call_1 = ret_input()", "Label: ~call_2 = ret_int(x): Label: ~call_2 = ret_int(x), Label: x = ~call_1, Label: ~call_1 = ret_input()", "Label: x = ~call_2: Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: while x > 1:: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: y = x / 2: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: if y > 3:: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: x = x - y: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: z = x - 4: Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: if z > 0:: Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: x = x / 2: Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: z = z - 1: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: ~call_3 = ret_print(x): Label: ~call_3 = ret_print(x), Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()", - "Label: Exit module: Label: ~call_3 = ret_print(x), Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()" + """Label: while x > 1:: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, + Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()""", + """Label: y = x / 2: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, + Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()""", + """Label: if y > 3:: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, + Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()""", + """Label: x = x - y: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, + Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()""", + """Label: z = x - 4: Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, + Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()""", + """Label: if z > 0:: Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, + Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()""", + """Label: x = x / 2: Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, + Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()""", + """Label: z = z - 1: Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, Label: x = x - y, Label: y = x / 2, + Label: x = ~call_2, Label: ~call_2 = ret_int(x), Label: ~call_1 = ret_input()""", + """Label: ~call_3 = ret_print(x): Label: ~call_3 = ret_print(x), Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, + Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), + Label: ~call_1 = ret_input()""", + """Label: Exit module: Label: ~call_3 = ret_print(x), Label: z = z - 1, Label: x = x / 2, Label: z = x - 4, + Label: x = x - y, Label: y = x / 2, Label: x = ~call_2, Label: ~call_2 = ret_int(x), + Label: ~call_1 = ret_input()""" ] i = 0 for k, v in constraint_table.items(): @@ -88,13 +104,19 @@ def test_while(self): "Label: ~call_2 = ret_input(): Label: ~call_2 = ret_input()", "Label: ~call_1 = ret_int(~call_2): Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", "Label: x = ~call_1: Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", - "Label: while x < 10:: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input(", - "Label: x = x + 1: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", - "Label: if x == 5:: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", - "Label: BreakNode: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", + """Label: while x < 10:: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), + Label: ~call_2 = ret_input(""", + """Label: x = x + 1: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), + Label: ~call_2 = ret_input()""", + """Label: if x == 5:: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), + Label: ~call_2 = ret_input()""", + """Label: BreakNode: Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), + Label: ~call_2 = ret_input()""", "Label: x = 6: Label: x = 6, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", - "Label: ~call_3 = ret_print(x): Label: ~call_3 = ret_print(x), Label: x = 6, Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()", - "Label: Exit module: Label: ~call_3 = ret_print(x), Label: x = 6, Label: x = x + 1, Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()" + """Label: ~call_3 = ret_print(x): Label: ~call_3 = ret_print(x), Label: x = 6, Label: x = x + 1, + Label: x = ~call_1, Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()""", + """Label: Exit module: Label: ~call_3 = ret_print(x), Label: x = 6, Label: x = x + 1, Label: x = ~call_1, + Label: ~call_1 = ret_int(~call_2), Label: ~call_2 = ret_input()""" ] i = 0 for k, v in constraint_table.items(): diff --git a/tests/cfg/cfg_test.py b/tests/cfg/cfg_test.py index f4862487..a42ac4e0 100644 --- a/tests/cfg/cfg_test.py +++ b/tests/cfg/cfg_test.py @@ -82,14 +82,16 @@ def test_for_complete(self): self.assertEqual(self.cfg.nodes[else_body_2].label, '~call_3 = ret_print(y)') self.assertEqual(self.cfg.nodes[next_node].label, 'x = 3') - self.assertInCfg([(for_node, entry), - (body_1, for_node), - (else_body_1, for_node), - (body_2, body_1), - (for_node, body_2), - (else_body_2, else_body_1), - (next_node, else_body_2), - (exit_node, next_node)]) + self.assertInCfg([ + (for_node, entry), + (body_1, for_node), + (else_body_1, for_node), + (body_2, body_1), + (for_node, body_2), + (else_body_2, else_body_1), + (next_node, else_body_2), + (exit_node, next_node) + ]) def test_for_no_orelse(self): self.cfg_create_from_file('examples/example_inputs/for_no_orelse.py') @@ -105,7 +107,14 @@ def test_for_no_orelse(self): next_node = 4 exit_node = 5 - self.assertInCfg([(for_node, entry), (body_1, for_node), (body_2, body_1), (for_node, body_2), (next_node, for_node), (exit_node, next_node)]) + self.assertInCfg([ + (for_node, entry), + (body_1, for_node), + (body_2, body_1), + (for_node, body_2), + (next_node, for_node), + (exit_node, next_node) + ]) def test_for_tuple_target(self): self.cfg_create_from_file('examples/example_inputs/for_tuple_target.py') @@ -307,7 +316,7 @@ def test_try_orelse_with_no_variables_to_save_and_no_args(self): self.nodes = self.cfg_list_to_dict(self.cfg.nodes) self.assert_length(self.cfg.nodes, expected_length=13) - + entry = 0 try_ = 1 print_a5 = 2 @@ -323,21 +332,21 @@ def test_try_orelse_with_no_variables_to_save_and_no_args(self): _exit = 12 self.assertInCfg([ - self.connected(entry, try_), - self.connected(try_, print_a5), - self.connected(print_a5, except_im), - self.connected(print_a5, function_entry), - self.connected(print_a5, print_good), - self.connected(except_im, print_wagyu), - self.connected(print_wagyu, print_good), - self.connected(function_entry, ret_subprocess_call), - self.connected(ret_subprocess_call, ret_does_this_kill_us_4), - self.connected(ret_does_this_kill_us_4, exit_does_this_kill_us), - self.connected(exit_does_this_kill_us, ret_does_this_kill_us_3), - self.connected(ret_does_this_kill_us_3, print_so), - self.connected(print_so, print_good), - self.connected(print_good, _exit) - ]) + self.connected(entry, try_), + self.connected(try_, print_a5), + self.connected(print_a5, except_im), + self.connected(print_a5, function_entry), + self.connected(print_a5, print_good), + self.connected(except_im, print_wagyu), + self.connected(print_wagyu, print_good), + self.connected(function_entry, ret_subprocess_call), + self.connected(ret_subprocess_call, ret_does_this_kill_us_4), + self.connected(ret_does_this_kill_us_4, exit_does_this_kill_us), + self.connected(exit_does_this_kill_us, ret_does_this_kill_us_3), + self.connected(ret_does_this_kill_us_3, print_so), + self.connected(print_so, print_good), + self.connected(print_good, _exit) + ]) def test_final(self): self.cfg_create_from_file('examples/example_inputs/try_final.py') @@ -354,15 +363,17 @@ def test_final(self): print_final = 5 _exit = 6 - self.assertInCfg([self.connected(entry, try_), - self.connected(try_, try_body), - self.connected(try_body, except_im), - self.connected(try_body, print_final), - self.connected(try_body, _exit), - self.connected(except_im, except_im_body_1), - self.connected(except_im_body_1, _exit), - self.connected(except_im_body_1, print_final), - self.connected(print_final, _exit)]) + self.assertInCfg([ + self.connected(entry, try_), + self.connected(try_, try_body), + self.connected(try_body, except_im), + self.connected(try_body, print_final), + self.connected(try_body, _exit), + self.connected(except_im, except_im_body_1), + self.connected(except_im_body_1, _exit), + self.connected(except_im_body_1, print_final), + self.connected(print_final, _exit) + ]) class CFGIfTest(CFGBaseTestCase): @@ -714,10 +725,8 @@ def test_multiple_assignment(self): self.assert_length(self.cfg.nodes, expected_length=4) - # start_node = self.cfg.nodes[0] assign_y = self.cfg.nodes[1] assign_x = self.cfg.nodes[2] - # exit_node = self.cfg.nodes[-1] self.assertEqual(assign_x.label, 'x = 5') self.assertEqual(assign_y.label, 'y = 5') @@ -731,9 +740,13 @@ def test_assign_list_comprehension(self): call = self.cfg.nodes[1] self.assertEqual(call.label, "~call_1 = ret_''.join((x.n for x in range(16)))") - l = zip(range(1, length), range(length)) - - self.assertInCfg(list(l)) + self.assertInCfg( + list( + zip( + range(1, length), range(length) + ) + ) + ) def test_assignment_tuple_value(self): self.cfg_create_from_file('examples/example_inputs/assignment_tuple_value.py') @@ -742,7 +755,6 @@ def test_assignment_tuple_value(self): start_node = 0 node = 1 exit_node = 2 - # print(self.cfg) self.assertInCfg([(node, start_node), (exit_node, node)]) @@ -867,19 +879,21 @@ def test_function_parameters(self): restore_actual_y = 12 _exit = 13 - self.assertInCfg([self.connected(entry, input_call), - self.connected(input_call, y_assignment), - self.connected(y_assignment, save_y), - self.connected(save_y, save_actual_y), - self.connected(save_actual_y, bar_local_y), - self.connected(bar_local_y, entry_bar), - self.connected(entry_bar, another_input_call), - self.connected(another_input_call, bar_y_assignment), - self.connected(bar_y_assignment, bar_print_y), - self.connected(bar_print_y, bar_print_x), - self.connected(bar_print_x, exit_bar), - self.connected(exit_bar, restore_actual_y), - self.connected(restore_actual_y, _exit)]) + self.assertInCfg([ + self.connected(entry, input_call), + self.connected(input_call, y_assignment), + self.connected(y_assignment, save_y), + self.connected(save_y, save_actual_y), + self.connected(save_actual_y, bar_local_y), + self.connected(bar_local_y, entry_bar), + self.connected(entry_bar, another_input_call), + self.connected(another_input_call, bar_y_assignment), + self.connected(bar_y_assignment, bar_print_y), + self.connected(bar_print_y, bar_print_x), + self.connected(bar_print_x, exit_bar), + self.connected(exit_bar, restore_actual_y), + self.connected(restore_actual_y, _exit) + ]) def test_function_with_return(self): path = 'examples/example_inputs/simple_function_with_return.py' @@ -887,8 +901,13 @@ def test_function_with_return(self): self.assert_length(self.cfg.nodes, expected_length=19) - l = zip(range(1, len(self.cfg.nodes)), range(len(self.cfg.nodes))) - self.assertInCfg(list(l)) + self.assertInCfg( + list( + zip( + range(1, len(self.cfg.nodes)), range(len(self.cfg.nodes)) + ) + ) + ) def test_function_multiple_return(self): path = 'examples/example_inputs/function_with_multiple_return.py' @@ -1238,9 +1257,14 @@ def test_multiple_parameters(self): length = len(self.cfg.nodes) self.assertEqual(length, 21) - l = zip(range(1, length), range(length)) - self.assertInCfg(list(l)) + self.assertInCfg( + list( + zip( + range(1, length), range(length) + ) + ) + ) def test_call_on_call(self): path = 'examples/example_inputs/call_on_call.py' @@ -1258,8 +1282,13 @@ def test_call_with_attribute(self): call = self.cfg.nodes[2] self.assertEqual(call.label, "~call_1 = ret_request.args.get('param', 'not set')") - l = zip(range(1, length), range(length)) - self.assertInCfg(list(l)) + self.assertInCfg( + list( + zip( + range(1, length), range(length) + ) + ) + ) def test_call_with_attribute_line_numbers(self): call = self.cfg.nodes[2] diff --git a/tests/vulnerabilities/vulnerabilities_across_files_test.py b/tests/vulnerabilities/vulnerabilities_across_files_test.py index d8bd3840..c0985723 100644 --- a/tests/vulnerabilities/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities/vulnerabilities_across_files_test.py @@ -56,7 +56,7 @@ def test_find_vulnerabilities_absolute_from_file_command_injection_2(self): self.assert_length(vulnerabilities, expected_length=1) def test_no_false_positive_absolute_from_file_command_injection_3(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/no_false_positive_absolute_from_file_command_injection_3.py') + vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/no_false_positive_absolute_from_file_command_injection_3.py') # noqa: E501 self.assert_length(vulnerabilities, expected_length=0) def test_blackbox_library_call(self): @@ -79,7 +79,8 @@ def test_blackbox_library_call(self): File: examples/vulnerable_code_across_files/blackbox_library_call.py > reaches line 17, sink "subprocess.call(": ~call_3 = ret_subprocess.call(hey, shell=True) - This vulnerability is unknown due to: Label: ~call_2 = ret_scrypt.encrypt('echo ' + param + ' >> ' + 'menu.txt', 'password') + This vulnerability is unknown due to: + Label: ~call_2 = ret_scrypt.encrypt('echo ' + param + ' >> ' + 'menu.txt', 'password') """ self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) @@ -295,5 +296,5 @@ def test_find_vulnerabilities_import_file_command_injection_2(self): self.assert_length(vulnerabilities, expected_length=1) def test_no_false_positive_import_file_command_injection_3(self): - vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/no_false_positive_import_file_command_injection_3.py') + vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/no_false_positive_import_file_command_injection_3.py') # noqa: E501 self.assert_length(vulnerabilities, expected_length=0) diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 5e40a60f..4c0dd2eb 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -40,31 +40,31 @@ def test_parse(self): self.assert_length(definitions.sinks[1][1], expected_length=3) def test_parse_section(self): - l = list(trigger_definitions_parser.parse_section(iter(['get']))) - self.assert_length(l, expected_length=1) - self.assertEqual(l[0][0], 'get') - self.assertEqual(l[0][1], list()) - - l = list(trigger_definitions_parser.parse_section(iter(['get', 'get -> a, b, c d s aq a']))) - self.assert_length(l, expected_length=2) - self.assertEqual(l[0][0], 'get') - self.assertEqual(l[1][0], 'get') - self.assertEqual(l[1][1], ['a', 'b', 'c d s aq a']) - self.assert_length(l[1][1], expected_length=3) + list_ = list(trigger_definitions_parser.parse_section(iter(['get']))) + self.assert_length(list_, expected_length=1) + self.assertEqual(list_[0][0], 'get') + self.assertEqual(list_[0][1], list()) + + list_ = list(trigger_definitions_parser.parse_section(iter(['get', 'get -> a, b, c d s aq a']))) + self.assert_length(list_, expected_length=2) + self.assertEqual(list_[0][0], 'get') + self.assertEqual(list_[1][0], 'get') + self.assertEqual(list_[1][1], ['a', 'b', 'c d s aq a']) + self.assert_length(list_[1][1], expected_length=3) def test_label_contains(self): cfg_node = Node('label', None, line_number=None, path=None) trigger_words = [('get', [])] - l = list(vulnerabilities.label_contains(cfg_node, trigger_words)) - self.assert_length(l, expected_length=0) + list_ = list(vulnerabilities.label_contains(cfg_node, trigger_words)) + self.assert_length(list_, expected_length=0) cfg_node = Node('request.get("stefan")', None, line_number=None, path=None) trigger_words = [('get', []), ('request', [])] - l = list(vulnerabilities.label_contains(cfg_node, trigger_words)) - self.assert_length(l, expected_length=2) + list_ = list(vulnerabilities.label_contains(cfg_node, trigger_words)) + self.assert_length(list_, expected_length=2) - trigger_node_1 = l[0] - trigger_node_2 = l[1] + trigger_node_1 = list_[0] + trigger_node_2 = list_[1] self.assertEqual(trigger_node_1.trigger_word, 'get') self.assertEqual(trigger_node_1.cfg_node, cfg_node) self.assertEqual(trigger_node_2.trigger_word, 'request') @@ -72,8 +72,8 @@ def test_label_contains(self): cfg_node = Node('request.get("stefan")', None, line_number=None, path=None) trigger_words = [('get', []), ('get', [])] - l = list(vulnerabilities.label_contains(cfg_node, trigger_words)) - self.assert_length(l, expected_length=2) + list_ = list(vulnerabilities.label_contains(cfg_node, trigger_words)) + self.assert_length(list_, expected_length=2) def test_find_triggers(self): self.cfg_create_from_file('examples/vulnerable_code/XSS.py') @@ -85,12 +85,12 @@ def test_find_triggers(self): XSS1 = cfg_list[1] trigger_words = [('get', [])] - l = vulnerabilities.find_triggers( + list_ = vulnerabilities.find_triggers( XSS1.nodes, trigger_words, nosec_lines=set() ) - self.assert_length(l, expected_length=1) + self.assert_length(list_, expected_length=1) def test_find_sanitiser_nodes(self): cfg_node = Node(None, None, line_number=None, path=None) From 40c0f8f2864b72e88444d9a5aab35b84ce98ac07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Sat, 16 Jun 2018 18:05:42 +0300 Subject: [PATCH 151/291] Update __main__.py --- pyt/__main__.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 8c6b4cb4..4b629b28 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -45,7 +45,7 @@ def discover_files(targets, excluded_files, recursive=False): else: if target not in excluded_list: included_files.append(targets[0]) - return(included_files) + return included_files def main(command_line_args=sys.argv[1:]): @@ -62,7 +62,7 @@ def main(command_line_args=sys.argv[1:]): args.excluded_paths, args.recursive ) - + vulnerabilities = list() for path in files: print(path) if args.ignore_nosec: @@ -82,7 +82,6 @@ def main(command_line_args=sys.argv[1:]): directory = os.path.dirname(path) project_modules = get_modules(directory) local_modules = get_directory_modules(directory) - tree = generate_ast(path) cfg = make_cfg( @@ -110,19 +109,14 @@ def main(command_line_args=sys.argv[1:]): initialize_constraint_table(cfg_list) analyse(cfg_list) - vulnerabilities = find_vulnerabilities( + vulnerabilities.append(find_vulnerabilities( cfg_list, ui_mode, args.blackbox_mapping_file, args.trigger_word_file, nosec_lines - ) + )) - if args.baseline: - vulnerabilities = get_vulnerabilities_not_in_baseline( - vulnerabilities, - args.baseline - ) if args.json: json.report(vulnerabilities, args.output_file) From 42759f0dfebe0d12c490d8bad17743578fe66608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Sat, 16 Jun 2018 18:07:58 +0300 Subject: [PATCH 152/291] Update __main__.py --- pyt/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 4b629b28..ad0cce7b 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -48,7 +48,7 @@ def discover_files(targets, excluded_files, recursive=False): return included_files -def main(command_line_args=sys.argv[1:]): +def main(command_line_args=sys.argv[1:]): # noqa: C901 args = parse_args(command_line_args) ui_mode = UImode.NORMAL From ba80c3a70cb3c8c5e949c36a8c90b8eba9735c90 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sun, 17 Jun 2018 15:38:15 -0700 Subject: [PATCH 153/291] Filled in the web_frameworks/README.rst --- pyt/web_frameworks/README.rst | 54 +++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/pyt/web_frameworks/README.rst b/pyt/web_frameworks/README.rst index aa1b121b..7574aced 100644 --- a/pyt/web_frameworks/README.rst +++ b/pyt/web_frameworks/README.rst @@ -1,5 +1,53 @@ -Coming soon. +This code determines which functions have their arguments marked at tainted, for example by default the framework adaptor is Flask, so +.. code-block:: python -Web frameworks -Sorry state of affairs + @app.route('/') + def ito_en(image): + +will have arguments marked as tainted, whereas + +.. code-block:: python + + def tea(request, param): + +will not. (The ``--adaptor D`` option, for Django, would mark the 2nd functions' arguments as tainted and not the first.) + +There are currently 4 options for framework route criteria, in the `framework_helper.py`_ file: + +- `is_flask_route_function`_, the default, looks for a ``route`` decorator +- `is_django_view_function`_, ``-a D``, looks if the first argument is named ``request`` +- `is_function_without_leading_`_, ``-a P``, looks if the function does not start with an underscore +- `is_function`_, ``-a E``, always returns True + + +.. _framework_helper.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/web_frameworks/framework_helper.py + +.. _is\_django\_view\_function: https://github.com/python-security/pyt/blob/re_organize_code/pyt/web_frameworks/framework_helper.py#L7 +.. _is\_flask\_route\_function: https://github.com/python-security/pyt/blob/re_organize_code/pyt/web_frameworks/framework_helper.py#L14 +.. _is\_function\_without\_leading\_: https://github.com/python-security/pyt/blob/re_organize_code/pyt/web_frameworks/framework_helper.py#L28 +.. _is\_function: https://github.com/python-security/pyt/blob/re_organize_code/pyt/web_frameworks/framework_helper.py#L23 + + +How The Code Works +================== + +`FrameworkAdaptor`_ is what `__main__.py`_ creates, it takes a framework_route_criteria that is chosen by the --adaptor cli argument. The framework_route_criteria is a function that takes an `ast.FunctionDef`_ and returns whether or not it is a route in the selected web framework. + +We mark the arguments as tainted by `looping through them`_ and making them node type `TaintedNode`_, where we then `add them to the list of sources`_. + + +.. _FrameworkAdaptor: https://github.com/python-security/pyt/blob/re_organize_code/pyt/web_frameworks/framework_adaptor.py#L14 +.. _\_\_main\_\_.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/__main__.py#L71-L85 +.. _ast.FunctionDef: http://greentreesnakes.readthedocs.io/en/latest/nodes.html#FunctionDef + +.. _looping through them: https://github.com/python-security/pyt/blob/re_organize_code/pyt/web_frameworks/framework_adaptor.py#L54 +.. _TaintedNode: https://github.com/python-security/pyt/blob/re_organize_code/pyt/core/node_types.py#L178 +.. _add them to the list of sources: https://github.com/python-security/pyt/blob/re_organize_code/pyt/vulnerabilities/vulnerabilities.py#L51 + +Caveats +======= + +This currently is not smart enough to understand `class-based views`_, so you will have to use ``-a P`` to mark most functions arguments as tainted, and trim false-positives yourself, this is easier with the ``--baseline`` and ``--json`` options. + +.. _class-based views: http://flask.pocoo.org/docs/1.0/views/ From ad77e9e81f4527d9c86008e7dc4a63eb1a693f6b Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sun, 17 Jun 2018 15:40:15 -0700 Subject: [PATCH 154/291] Update README.rst --- pyt/web_frameworks/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/web_frameworks/README.rst b/pyt/web_frameworks/README.rst index 7574aced..f7292756 100644 --- a/pyt/web_frameworks/README.rst +++ b/pyt/web_frameworks/README.rst @@ -29,7 +29,7 @@ There are currently 4 options for framework route criteria, in the `framework_he .. _is\_function: https://github.com/python-security/pyt/blob/re_organize_code/pyt/web_frameworks/framework_helper.py#L23 -How The Code Works +How the Code Works ================== `FrameworkAdaptor`_ is what `__main__.py`_ creates, it takes a framework_route_criteria that is chosen by the --adaptor cli argument. The framework_route_criteria is a function that takes an `ast.FunctionDef`_ and returns whether or not it is a route in the selected web framework. From db087464c08fd089c99b66587269a1ea59356aaa Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sun, 17 Jun 2018 15:48:17 -0700 Subject: [PATCH 155/291] Changed a lot of links to point to master branch --- pyt/README.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pyt/README.rst b/pyt/README.rst index 7d69b9ce..c6ab4c32 100644 --- a/pyt/README.rst +++ b/pyt/README.rst @@ -3,7 +3,7 @@ How It Works `__main__.py`_ is where all the high-level steps happen. -.. _\_\_main\_\_.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/__main__.py +.. _\_\_main\_\_.py: https://github.com/python-security/pyt/blob/master/pyt/__main__.py Step 1 Parse command line arguments. @@ -11,7 +11,7 @@ Step 1 `parse_args`_ in `usage.py`_ .. _parse_args: https://github.com/python-security/pyt/blob/re_organize_code/pyt/usage.py#L113 - .. _usage.py: https://github.com/python-security/pyt/blob/re_organize_code/pyt/usage.py + .. _usage.py: https://github.com/python-security/pyt/blob/master/pyt/usage.py Step 2 @@ -35,22 +35,22 @@ Step 2 Step 3 Pass the AST to create a `Control Flow Graph (CFG)`_ - .. _Control Flow Graph (CFG): https://github.com/python-security/pyt/tree/re_organize_code/pyt/cfg + .. _Control Flow Graph (CFG): https://github.com/python-security/pyt/tree/master/pyt/cfg Step 4 Pass the CFG to a `Framework Adaptor`_, which will mark the arguments of certain functions as tainted sources. - .. _Framework Adaptor: https://github.com/python-security/pyt/tree/re_organize_code/pyt/web_frameworks + .. _Framework Adaptor: https://github.com/python-security/pyt/tree/master/pyt/web_frameworks Step 5 Perform `(modified-)reaching definitions analysis`_, to know where definitions reach. - .. _\(modified\-\)reaching definitions analysis: https://github.com/python-security/pyt/tree/re_organize_code/pyt/analysis + .. _\(modified\-\)reaching definitions analysis: https://github.com/python-security/pyt/tree/master/pyt/analysis Step 6 `Find vulnerabilities`_, by seeing where sources reach, and how. - .. _Find vulnerabilities: https://github.com/python-security/pyt/tree/re_organize_code/pyt/vulnerabilities + .. _Find vulnerabilities: https://github.com/python-security/pyt/tree/master/pyt/vulnerabilities Step 7 `Remove already known vulnerabilities`_ if a `baseline`_ (JSON file of a previous run of PyT) is provided. @@ -61,7 +61,7 @@ Step 7 Step 8 Output the results in either `text or JSON form`_, to stdout or the `output file`_. - .. _text or JSON form: https://github.com/python-security/pyt/tree/re_organize_code/pyt/formatters + .. _text or JSON form: https://github.com/python-security/pyt/tree/master/pyt/formatters .. _output file: https://github.com/python-security/pyt/blob/re_organize_code/pyt/usage.py#L80 Here is an image from the `original thesis`_: From 01397dd8c76907731c6f23e5d7baa5493a721973 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sun, 17 Jun 2018 15:49:56 -0700 Subject: [PATCH 156/291] Update README.rst --- pyt/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/README.rst b/pyt/README.rst index c6ab4c32..118a5080 100644 --- a/pyt/README.rst +++ b/pyt/README.rst @@ -45,7 +45,7 @@ Step 4 Step 5 Perform `(modified-)reaching definitions analysis`_, to know where definitions reach. - .. _\(modified\-\)reaching definitions analysis: https://github.com/python-security/pyt/tree/master/pyt/analysis + .. _\(modified\-\)reaching definitions analysis: https://github.com/python-security/pyt/tree/master/pyt/analysis#where-do-definitions-reach Step 6 `Find vulnerabilities`_, by seeing where sources reach, and how. From 5546c3dc74a24ada1e9f5ea8a63779e2fd538ea5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Tue, 19 Jun 2018 13:43:15 +0300 Subject: [PATCH 157/291] changed func. and added baseline --- pyt/__main__.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index ad0cce7b..643eeb19 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -48,7 +48,7 @@ def discover_files(targets, excluded_files, recursive=False): return included_files -def main(command_line_args=sys.argv[1:]): # noqa: C901 +def main(command_line_args=sys.argv[1:]): args = parse_args(command_line_args) ui_mode = UImode.NORMAL @@ -91,6 +91,8 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 path ) cfg_list = [cfg] + + framework_route_criteria = is_flask_route_function if args.adaptor: if args.adaptor.lower().startswith('e'): @@ -109,7 +111,7 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 initialize_constraint_table(cfg_list) analyse(cfg_list) - vulnerabilities.append(find_vulnerabilities( + vulnerabilities.extend(find_vulnerabilities( cfg_list, ui_mode, args.blackbox_mapping_file, @@ -117,6 +119,12 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 nosec_lines )) + if args.baseline: + vulnerabilities = get_vulnerabilities_not_in_baseline( + vulnerabilities, + args.baseline + ) + if args.json: json.report(vulnerabilities, args.output_file) From 8d1d80569d0c5f2af8eeae1b88cc8a9ef4b26043 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Tue, 19 Jun 2018 13:47:51 +0300 Subject: [PATCH 158/291] new parameters for discover_files --- tests/main_test.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/main_test.py b/tests/main_test.py index eea6ff47..aee80c68 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -5,17 +5,18 @@ class MainTest(BaseTestCase): + @mock.patch('pyt.__main__.discover_files') @mock.patch('pyt.__main__.parse_args') @mock.patch('pyt.__main__.find_vulnerabilities') @mock.patch('pyt.__main__.text') - def test_text_output(self, mock_text, mock_find_vulnerabilities, mock_parse_args): + def test_text_output(self, mock_text, mock_find_vulnerabilities, mock_parse_args, mock_discover_files): mock_find_vulnerabilities.return_value = 'stuff' example_file = 'examples/vulnerable_code/inter_command_injection.py' output_file = 'mocked_outfile' + mock_discover_files.return_value = [example_file] mock_parse_args.return_value = mock.Mock( autospec=True, - filepath=example_file, project_root=None, baseline=None, json=None, @@ -32,17 +33,18 @@ def test_text_output(self, mock_text, mock_find_vulnerabilities, mock_parse_args mock_parse_args.return_value.output_file ) + @mock.patch('pyt.__main__.discover_files') @mock.patch('pyt.__main__.parse_args') @mock.patch('pyt.__main__.find_vulnerabilities') @mock.patch('pyt.__main__.json') - def test_json_output(self, mock_json, mock_find_vulnerabilities, mock_parse_args): + def test_json_output(self, mock_json, mock_find_vulnerabilities, mock_parse_args, mock_discover_files): mock_find_vulnerabilities.return_value = 'stuff' example_file = 'examples/vulnerable_code/inter_command_injection.py' output_file = 'mocked_outfile' + mock_discover_files.return_value = [example_file] mock_parse_args.return_value = mock.Mock( autospec=True, - filepath=example_file, project_root=None, baseline=None, json=True, From 35b800195882f503e4030c2267b1422059bfd82b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Wed, 20 Jun 2018 03:18:36 +0300 Subject: [PATCH 159/291] test_valid_args_but_no_targets() --- tests/usage_test.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/usage_test.py b/tests/usage_test.py index 4883c31d..d923e5ac 100644 --- a/tests/usage_test.py +++ b/tests/usage_test.py @@ -65,18 +65,19 @@ def test_no_args(self): self.assertEqual(stdout.getvalue(), EXPECTED) - '''def test_valid_args_but_no_filepath(self): + def test_valid_args_but_no_targets(self): with self.assertRaises(SystemExit): with capture_sys_output() as (_, stderr): parse_args(['-j']) - EXPECTED = """usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] + EXPECTED = """usage: python -m pyt [-h] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] - [-trim] [-i] -python -m pyt: error: The -f/--filepath argument is required\n""" + [-r] [-x EXCLUDED_PATHS] [-trim] [-i] + [targets [targets ...]] +python -m pyt: error: The targets argument is required\n""" - self.assertEqual(stderr.getvalue(), EXPECTED)''' + self.assertEqual(stderr.getvalue(), EXPECTED) # def test_using_both_mutually_exclusive_args(self): # with self.assertRaises(SystemExit): From 2e4d07a0c3110be77c6caf6bf40e7667d85355d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Wed, 20 Jun 2018 03:37:00 +0300 Subject: [PATCH 160/291] edited expected values --- tests/usage_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/usage_test.py b/tests/usage_test.py index d923e5ac..d9ed7cec 100644 --- a/tests/usage_test.py +++ b/tests/usage_test.py @@ -29,7 +29,7 @@ def test_no_args(self): [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] [-r] [-x EXCLUDED_PATHS] [-trim] [-i] - [targets [targets ...]] + targets [targets ...] required arguments: targets source file(s) or directory(s) to be tested @@ -74,8 +74,8 @@ def test_valid_args_but_no_targets(self): [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] [-r] [-x EXCLUDED_PATHS] [-trim] [-i] - [targets [targets ...]] -python -m pyt: error: The targets argument is required\n""" + targets [targets ...] +python -m pyt: error: the following arguments are required: targets\n""" self.assertEqual(stderr.getvalue(), EXPECTED) From 0c6b08296e16a33ccc429966faedca2fdaf829dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Wed, 20 Jun 2018 15:17:59 +0300 Subject: [PATCH 161/291] changed vulnerabilities list location --- pyt/__main__.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 643eeb19..aee51be6 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -33,18 +33,20 @@ def discover_files(targets, excluded_files, recursive=False): included_files = list() excluded_list = excluded_files.split(",") - + + for target in targets: if os.path.isdir(target): - if recursive: for root, dirs, files in os.walk(target): for f in files: + if not recursive: + break fullpath = os.path.join(root, f) if os.path.splitext(fullpath)[1] == '.py' and fullpath.split("/")[-1] not in excluded_list: included_files.append(fullpath) else: if target not in excluded_list: - included_files.append(targets[0]) + included_files.append(target) return included_files @@ -62,9 +64,8 @@ def main(command_line_args=sys.argv[1:]): args.excluded_paths, args.recursive ) - vulnerabilities = list() for path in files: - print(path) + vulnerabilities = list() if args.ignore_nosec: nosec_lines = set() else: From ae84a442925f976ffca6416ac9619e799e7fb86c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Wed, 20 Jun 2018 15:26:12 +0300 Subject: [PATCH 162/291] Update usage_test.py --- tests/usage_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/usage_test.py b/tests/usage_test.py index d9ed7cec..44c60166 100644 --- a/tests/usage_test.py +++ b/tests/usage_test.py @@ -74,7 +74,7 @@ def test_valid_args_but_no_targets(self): [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] [-r] [-x EXCLUDED_PATHS] [-trim] [-i] - targets [targets ...] + [targets [targets ...]] python -m pyt: error: the following arguments are required: targets\n""" self.assertEqual(stderr.getvalue(), EXPECTED) From 1944b4ade9a08d34f83fb42037f9af63327f0ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Wed, 20 Jun 2018 15:29:30 +0300 Subject: [PATCH 163/291] Update usage_test.py --- tests/usage_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/usage_test.py b/tests/usage_test.py index 44c60166..d9ed7cec 100644 --- a/tests/usage_test.py +++ b/tests/usage_test.py @@ -74,7 +74,7 @@ def test_valid_args_but_no_targets(self): [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] [-r] [-x EXCLUDED_PATHS] [-trim] [-i] - [targets [targets ...]] + targets [targets ...] python -m pyt: error: the following arguments are required: targets\n""" self.assertEqual(stderr.getvalue(), EXPECTED) From ba3d4383ecafce755fdb3ec785430f1ebd4c5577 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Thu, 21 Jun 2018 15:42:53 +0300 Subject: [PATCH 164/291] changed location of "recursive control" --- pyt/__main__.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index aee51be6..1e8c2640 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -33,17 +33,15 @@ def discover_files(targets, excluded_files, recursive=False): included_files = list() excluded_list = excluded_files.split(",") - - for target in targets: if os.path.isdir(target): for root, dirs, files in os.walk(target): for f in files: - if not recursive: - break fullpath = os.path.join(root, f) if os.path.splitext(fullpath)[1] == '.py' and fullpath.split("/")[-1] not in excluded_list: included_files.append(fullpath) + if not recursive: + break else: if target not in excluded_list: included_files.append(target) @@ -64,6 +62,7 @@ def main(command_line_args=sys.argv[1:]): args.excluded_paths, args.recursive ) + for path in files: vulnerabilities = list() if args.ignore_nosec: @@ -121,10 +120,10 @@ def main(command_line_args=sys.argv[1:]): )) if args.baseline: - vulnerabilities = get_vulnerabilities_not_in_baseline( - vulnerabilities, - args.baseline - ) + vulnerabilities = get_vulnerabilities_not_in_baseline( + vulnerabilities, + args.baseline + ) if args.json: From 6a25e25ce0e01dd434b320d48106f0127b2adb8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Fri, 22 Jun 2018 22:54:53 +0300 Subject: [PATCH 165/291] Update usage.py --- pyt/usage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/usage.py b/pyt/usage.py index 0892536e..30286215 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -30,7 +30,7 @@ def valid_date(s): def _add_required_group(parser): required_group = parser.add_argument_group('required arguments') required_group.add_argument( - 'targets', metavar='targets', type=str, nargs='*', + 'targets', metavar='targets', type=str, nargs='+', help='source file(s) or directory(s) to be tested' ) From f42d283bb7195165d82e1994e5965d2ee547e666 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Fri, 22 Jun 2018 22:56:05 +0300 Subject: [PATCH 166/291] de-dent some lines --- pyt/__main__.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 1e8c2640..062fea8b 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -35,13 +35,13 @@ def discover_files(targets, excluded_files, recursive=False): excluded_list = excluded_files.split(",") for target in targets: if os.path.isdir(target): - for root, dirs, files in os.walk(target): - for f in files: - fullpath = os.path.join(root, f) - if os.path.splitext(fullpath)[1] == '.py' and fullpath.split("/")[-1] not in excluded_list: - included_files.append(fullpath) - if not recursive: - break + for root, dirs, files in os.walk(target): + for f in files: + fullpath = os.path.join(root, f) + if os.path.splitext(fullpath)[1] == '.py' and fullpath.split("/")[-1] not in excluded_list: + included_files.append(fullpath) + if not recursive: + break else: if target not in excluded_list: included_files.append(target) @@ -62,7 +62,7 @@ def main(command_line_args=sys.argv[1:]): args.excluded_paths, args.recursive ) - + for path in files: vulnerabilities = list() if args.ignore_nosec: @@ -125,7 +125,6 @@ def main(command_line_args=sys.argv[1:]): args.baseline ) - if args.json: json.report(vulnerabilities, args.output_file) else: From c7b2f73d9817a6363e8ac4fe0862cefebd8a7ffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Fri, 22 Jun 2018 22:57:38 +0300 Subject: [PATCH 167/291] test_no_args --- tests/usage_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/usage_test.py b/tests/usage_test.py index d9ed7cec..ff8459e6 100644 --- a/tests/usage_test.py +++ b/tests/usage_test.py @@ -29,7 +29,7 @@ def test_no_args(self): [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] [-r] [-x EXCLUDED_PATHS] [-trim] [-i] - targets [targets ...] + [targets [targets ...]] required arguments: targets source file(s) or directory(s) to be tested From 2afc177d0eea61767f11f8daa4f3f2faf13d7e55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Fri, 22 Jun 2018 22:59:52 +0300 Subject: [PATCH 168/291] test_no_args passed --- tests/usage_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/usage_test.py b/tests/usage_test.py index ff8459e6..d9ed7cec 100644 --- a/tests/usage_test.py +++ b/tests/usage_test.py @@ -29,7 +29,7 @@ def test_no_args(self): [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] [-r] [-x EXCLUDED_PATHS] [-trim] [-i] - [targets [targets ...]] + targets [targets ...] required arguments: targets source file(s) or directory(s) to be tested From dfb5c0d64157ff502854abe40a4e191d01306673 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Fri, 22 Jun 2018 17:35:43 -0700 Subject: [PATCH 169/291] Add noqa: C901 back to def main --- pyt/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 062fea8b..90ee0e91 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -48,7 +48,7 @@ def discover_files(targets, excluded_files, recursive=False): return included_files -def main(command_line_args=sys.argv[1:]): +def main(command_line_args=sys.argv[1:]): # noqa: C901 args = parse_args(command_line_args) ui_mode = UImode.NORMAL From cf393c9f4f32714d58fbca403a4a28bccab60b5e Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 22 Jun 2018 20:39:08 -0700 Subject: [PATCH 170/291] Fix Travis after #129 merge, add mock to requirements-dev and unpin all versions, add travis commands to tox so this does not happen again --- pyt/__main__.py | 3 +-- requirements-dev.txt | 15 ++++++++------- tox.ini | 2 ++ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 90ee0e91..726cdf05 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -62,7 +62,7 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 args.excluded_paths, args.recursive ) - + for path in files: vulnerabilities = list() if args.ignore_nosec: @@ -92,7 +92,6 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 ) cfg_list = [cfg] - framework_route_criteria = is_flask_route_function if args.adaptor: if args.adaptor.lower().startswith('e'): diff --git a/requirements-dev.txt b/requirements-dev.txt index 339d4dcc..54e84f37 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,7 +1,8 @@ -flake8==3.5.0 -pre-commit==0.16.3 -py==1.5.2 -pycodestyle==2.3.1 -pyflakes==1.5.0 -tox==2.9.1 -virtualenv==15.1.0 +flake8 +mock +pre-commit +py +pycodestyle +pyflakes +tox +virtualenv diff --git a/tox.ini b/tox.ini index f85b156c..1199b32f 100644 --- a/tox.ini +++ b/tox.ini @@ -10,3 +10,5 @@ commands = coverage report --include=tests/* --fail-under 100 coverage report --include=pyt/* --fail-under 88 pre-commit run + flake8 . --count --exclude=examples,venv,.tox --select=E901,E999,F821,F822,F823 --show-source --statistics + flake8 . --count --exclude=examples,venv,.tox,dist --exit-zero --max-complexity=10 --max-line-length=127 --statistics From 61ce4751531b01e968698aa537d58b68eb606f01 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sun, 24 Jun 2018 14:37:41 -0700 Subject: [PATCH 171/291] Cleaned up pyt/core/ directory, mostly added _ to private functions --- pyt/core/ast_helper.py | 24 ++++++++-------- pyt/core/module_definitions.py | 8 +++++- pyt/core/project_handler.py | 46 ++++++------------------------ tests/cfg/import_test.py | 6 ++-- tests/cfg/nested_functions_test.py | 6 ++-- tests/core/project_handler_test.py | 9 +++--- tests/test_utils.py | 31 ++++++++++++++++++++ 7 files changed, 68 insertions(+), 62 deletions(-) create mode 100644 tests/test_utils.py diff --git a/pyt/core/ast_helper.py b/pyt/core/ast_helper.py index 17013128..33ef5844 100644 --- a/pyt/core/ast_helper.py +++ b/pyt/core/ast_helper.py @@ -10,7 +10,7 @@ recursive = False -def convert_to_3(path): # pragma: no cover +def _convert_to_3(path): # pragma: no cover """Convert python 2 file to python 3.""" try: print('##### Trying to convert file to Python 3. #####') @@ -34,7 +34,7 @@ def generate_ast(path): except SyntaxError: # pragma: no cover global recursive if not recursive: - convert_to_3(path) + _convert_to_3(path) recursive = True return generate_ast(path) else: @@ -44,12 +44,7 @@ def generate_ast(path): raise IOError('Input needs to be a file. Path: ' + path) -def list_to_dotted_string(list_of_components): - """Convert a list to a string seperated by a dot.""" - return '.'.join(list_of_components) - - -def get_call_names_helper(node, result): +def _get_call_names_helper(node, result): """Recursively finds all function names.""" if isinstance(node, ast.Name): if node.id not in BLACK_LISTED_CALL_NAMES: @@ -58,24 +53,29 @@ def get_call_names_helper(node, result): elif isinstance(node, ast.Call): return result elif isinstance(node, ast.Subscript): - return get_call_names_helper(node.value, result) + return _get_call_names_helper(node.value, result) elif isinstance(node, ast.Str): result.append(node.s) return result else: result.append(node.attr) - return get_call_names_helper(node.value, result) + return _get_call_names_helper(node.value, result) + + +def _list_to_dotted_string(list_of_components): + """Convert a list to a string seperated by a dot.""" + return '.'.join(list_of_components) def get_call_names_as_string(node): """Get a list of call names as a string.""" - return list_to_dotted_string(get_call_names(node)) + return _list_to_dotted_string(get_call_names(node)) def get_call_names(node): """Get a list of call names.""" result = list() - return reversed(get_call_names_helper(node, result)) + return reversed(_get_call_names_helper(node, result)) class Arguments(): diff --git a/pyt/core/module_definitions.py b/pyt/core/module_definitions.py index 6ec197de..767917c8 100644 --- a/pyt/core/module_definitions.py +++ b/pyt/core/module_definitions.py @@ -56,7 +56,13 @@ class ModuleDefinitions(): Adds to the project definitions list. """ - def __init__(self, import_names=None, module_name=None, is_init=False, filename=None): + def __init__( + self, + import_names=None, + module_name=None, + is_init=False, + filename=None + ): """Optionally set import names and module name. Module name should only be set when it is a normal import statement. diff --git a/pyt/core/project_handler.py b/pyt/core/project_handler.py index 4a16ff96..48eccfd1 100644 --- a/pyt/core/project_handler.py +++ b/pyt/core/project_handler.py @@ -5,30 +5,30 @@ import os -local_modules = list() +_local_modules = list() def get_directory_modules(directory): """Return a list containing tuples of e.g. ('__init__', 'example/import_test_project/__init__.py') """ - if local_modules and os.path.dirname(local_modules[0][1]) == directory: - return local_modules + if _local_modules and os.path.dirname(_local_modules[0][1]) == directory: + return _local_modules if not os.path.isdir(directory): # example/import_test_project/A.py -> example/import_test_project directory = os.path.dirname(directory) if directory == '': - return local_modules + return _local_modules for path in os.listdir(directory): - if is_python_file(path): + if _is_python_file(path): # A.py -> A module_name = os.path.splitext(path)[0] - local_modules.append((module_name, os.path.join(directory, path))) + _local_modules.append((module_name, os.path.join(directory, path))) - return local_modules + return _local_modules def get_modules(path): @@ -39,7 +39,7 @@ def get_modules(path): modules = list() for root, directories, filenames in os.walk(path): for filename in filenames: - if is_python_file(filename): + if _is_python_file(filename): directory = os.path.dirname( os.path.realpath( os.path.join( @@ -64,35 +64,7 @@ def get_modules(path): return modules -def get_modules_and_packages(path): - """Return a list containing tuples of - e.g. ('folder', 'example/test_project/folder', '.folder') - ('test_project.utils', 'example/test_project/utils.py') - """ - module_root = os.path.split(path)[1] - modules = list() - for root, directories, filenames in os.walk(path): - for directory in directories: - if directory != '__pycache__': - full_path = os.path.join(root, directory) - relative_path = os.path.realpath(full_path).split(module_root)[-1].replace(os.sep, '.') - # Remove the dot in front to be consistent - modules.append((relative_path[1:], full_path, relative_path)) - - for filename in filenames: - if is_python_file(filename): - full_path = os.path.join(root, filename) - directory = os.path.dirname(os.path.realpath(full_path)).split(module_root)[-1].replace(os.sep, '.') - directory = directory.replace('.', '', 1) - if directory: - modules.append(('.'.join((module_root, directory, filename.replace('.py', ''))), full_path)) - else: - modules.append(('.'.join((module_root, filename.replace('.py', ''))), full_path)) - - return modules - - -def is_python_file(path): +def _is_python_file(path): if os.path.splitext(path)[1] == '.py': return True return False diff --git a/tests/cfg/import_test.py b/tests/cfg/import_test.py index 842fafa5..baa4d6e1 100644 --- a/tests/cfg/import_test.py +++ b/tests/cfg/import_test.py @@ -2,12 +2,10 @@ import os from ..base_test_case import BaseTestCase +from ..test_utils import get_modules_and_packages from pyt.core.ast_helper import get_call_names_as_string -from pyt.core.project_handler import ( - get_directory_modules, - get_modules_and_packages -) +from pyt.core.project_handler import get_directory_modules class ImportTest(BaseTestCase): diff --git a/tests/cfg/nested_functions_test.py b/tests/cfg/nested_functions_test.py index 7a5f5772..5c9599d9 100644 --- a/tests/cfg/nested_functions_test.py +++ b/tests/cfg/nested_functions_test.py @@ -1,11 +1,9 @@ import os.path from ..base_test_case import BaseTestCase +from ..test_utils import get_modules_and_packages -from pyt.core.project_handler import ( - get_directory_modules, - get_modules_and_packages -) +from pyt.core.project_handler import get_directory_modules class NestedTest(BaseTestCase): diff --git a/tests/core/project_handler_test.py b/tests/core/project_handler_test.py index b6657cd5..4de34b21 100644 --- a/tests/core/project_handler_test.py +++ b/tests/core/project_handler_test.py @@ -1,10 +1,11 @@ import os import unittest +from ..test_utils import get_modules_and_packages + from pyt.core.project_handler import ( get_modules, - get_modules_and_packages, - is_python_file + _is_python_file ) @@ -15,8 +16,8 @@ def test_is_python_file(self): python_module = './project_handler_test.py' not_python_module = '../.travis.yml' - self.assertEqual(is_python_file(python_module), True) - self.assertEqual(is_python_file(not_python_module), False) + self.assertEqual(_is_python_file(python_module), True) + self.assertEqual(_is_python_file(not_python_module), False) def test_get_modules(self): project_folder = os.path.normpath(os.path.join('examples', 'test_project')) diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..1d882e33 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,31 @@ +import os + +from pyt.core.project_handler import _is_python_file + + +def get_modules_and_packages(path): + """Return a list containing tuples of + e.g. ('folder', 'example/test_project/folder', '.folder') + ('test_project.utils', 'example/test_project/utils.py') + """ + module_root = os.path.split(path)[1] + modules = list() + for root, directories, filenames in os.walk(path): + for directory in directories: + if directory != '__pycache__': + full_path = os.path.join(root, directory) + relative_path = os.path.realpath(full_path).split(module_root)[-1].replace(os.sep, '.') + # Remove the dot in front to be consistent + modules.append((relative_path[1:], full_path, relative_path)) + + for filename in filenames: + if _is_python_file(filename): + full_path = os.path.join(root, filename) + directory = os.path.dirname(os.path.realpath(full_path)).split(module_root)[-1].replace(os.sep, '.') + directory = directory.replace('.', '', 1) + if directory: + modules.append(('.'.join((module_root, directory, filename.replace('.py', ''))), full_path)) + else: + modules.append(('.'.join((module_root, filename.replace('.py', ''))), full_path)) + + return modules From 2df9873f2a078bac5cfc5835873b75ea27f345fc Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sun, 24 Jun 2018 14:40:28 -0700 Subject: [PATCH 172/291] Change test coverage from 88% to 87% (moved a function to tests/) in last commit --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 1199b32f..4d5f66e8 100644 --- a/tox.ini +++ b/tox.ini @@ -8,7 +8,7 @@ commands = coverage erase coverage run tests coverage report --include=tests/* --fail-under 100 - coverage report --include=pyt/* --fail-under 88 + coverage report --include=pyt/* --fail-under 87 pre-commit run flake8 . --count --exclude=examples,venv,.tox --select=E901,E999,F821,F822,F823 --show-source --statistics flake8 . --count --exclude=examples,venv,.tox,dist --exit-zero --max-complexity=10 --max-line-length=127 --statistics From b07035f2812817ed8340303cc8df9aeee3168489 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sun, 24 Jun 2018 15:19:42 -0700 Subject: [PATCH 173/291] Move a function around, edit docstring of Arguments --- pyt/core/ast_helper.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pyt/core/ast_helper.py b/pyt/core/ast_helper.py index 33ef5844..dc4f8195 100644 --- a/pyt/core/ast_helper.py +++ b/pyt/core/ast_helper.py @@ -62,6 +62,12 @@ def _get_call_names_helper(node, result): return _get_call_names_helper(node.value, result) +def get_call_names(node): + """Get a list of call names.""" + result = list() + return reversed(_get_call_names_helper(node, result)) + + def _list_to_dotted_string(list_of_components): """Convert a list to a string seperated by a dot.""" return '.'.join(list_of_components) @@ -72,17 +78,11 @@ def get_call_names_as_string(node): return _list_to_dotted_string(get_call_names(node)) -def get_call_names(node): - """Get a list of call names.""" - result = list() - return reversed(_get_call_names_helper(node, result)) - - class Arguments(): """Represents arguments of a function.""" def __init__(self, args): - """Create an Argument container class. + """Argument container class. Args: args(list(ast.args): The arguments in a function AST node. From 4de0bc53d2fa6b77dea3900642126ff3bd05846f Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sun, 24 Jun 2018 15:26:03 -0700 Subject: [PATCH 174/291] Update README.rst --- pyt/core/README.rst | 46 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/pyt/core/README.rst b/pyt/core/README.rst index 3ba5b13c..11b8090f 100644 --- a/pyt/core/README.rst +++ b/pyt/core/README.rst @@ -1 +1,45 @@ -Coming soon. +This directory contains miscellaneous code that is imported from different parts of the codebase. + + +`ast_helper.py`_ contains + + + +- `generate_ast`_ to read any file and generate an AST from it, this is called from `__main__.py`_ and `stmt_visitor.py`_ when importing a module. + +- `get_call_names`_ used in `vars_visitor.py`_ when visiting a Subscript, and `framework_helper.py`_ on function decorators in `is_flask_route_function`_ + +- `get_call_names_as_string`_ used in `expr_visitor.py`_ to create ret_function_name as RHS and yield_function_name as LHS, and in stmt_visitor.py when connecting a function to a loop. + +- `Arguments`_ used in `expr_visitor.py`_ when processing the arguments of a user defined function and `framework_adaptor.py`_ to taint function definition arguments. + + +.. _ast\_helper.py: https://github.com/python-security/pyt/blob/master/pyt/core/ast_helper.py +.. _generate\_ast: https://github.com/python-security/pyt/blob/61ce4751531b01e968698aa537d58b68eb606f01/pyt/core/ast_helper.py#L24-L44 + +.. _get\_call\_names\_as\_string: https://github.com/python-security/pyt/blob/61ce4751531b01e968698aa537d58b68eb606f01/pyt/core/ast_helper.py#L70-L72 +.. _get\_call\_names: https://github.com/python-security/pyt/blob/61ce4751531b01e968698aa537d58b68eb606f01/pyt/core/ast_helper.py#L75-L75 + + + + +`module_definitions.py`_ contains TODO + +`node_types.py`_ contains all the different node types created in `expr_visitor.py`_ and `stmt_visitor.py`_ + +`project_handler.py`_ contains TODO + +.. _module_definitions.py: https://github.com/python-security/pyt/blob/master/pyt/core/module_definitions.py + +.. _node_types.py: https://github.com/python-security/pyt/blob/master/pyt/core/node_types.py + +.. _project_handler.py: https://github.com/python-security/pyt/blob/master/pyt/core/project_handler.py + + +.. _\_\_main\_\_.py: https://github.com/python-security/pyt/blob/master/pyt/__main__.py +.. _stmt\_visitor.py: https://github.com/python-security/pyt/blob/master/pyt/cfg/stmt_visitor.py +.. _expr\_visitor.py: https://github.com/python-security/pyt/blob/master/pyt/cfg/expr_visitor.py +.. _framework\_adaptor.py: https://github.com/python-security/pyt/tree/master/pyt/web_frameworks +.. _framework\_helper.py: https://github.com/python-security/pyt/tree/master/pyt/web_frameworks +.. _is\_flask\_route_function: https://github.com/python-security/pyt/tree/master/pyt/web_frameworks +.. _vars\_visitor.py: https://github.com/python-security/pyt/tree/master/pyt/helper_visitors From 5305417e01f342cd670519acad354161f692e5c4 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sun, 24 Jun 2018 15:30:09 -0700 Subject: [PATCH 175/291] Update README.rst --- pyt/core/README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyt/core/README.rst b/pyt/core/README.rst index 11b8090f..6c616ba2 100644 --- a/pyt/core/README.rst +++ b/pyt/core/README.rst @@ -17,9 +17,9 @@ This directory contains miscellaneous code that is imported from different parts .. _ast\_helper.py: https://github.com/python-security/pyt/blob/master/pyt/core/ast_helper.py .. _generate\_ast: https://github.com/python-security/pyt/blob/61ce4751531b01e968698aa537d58b68eb606f01/pyt/core/ast_helper.py#L24-L44 -.. _get\_call\_names\_as\_string: https://github.com/python-security/pyt/blob/61ce4751531b01e968698aa537d58b68eb606f01/pyt/core/ast_helper.py#L70-L72 -.. _get\_call\_names: https://github.com/python-security/pyt/blob/61ce4751531b01e968698aa537d58b68eb606f01/pyt/core/ast_helper.py#L75-L75 - +.. _get\_call\_names: https://github.com/python-security/pyt/blob/b07035f2812817ed8340303cc8df9aeee3168489/pyt/core/ast_helper.py#L65-L68 +.. _get\_call\_names\_as\_string: https://github.com/python-security/pyt/blob/b07035f2812817ed8340303cc8df9aeee3168489/pyt/core/ast_helper.py#L76-L79 +.. _Arguments: https://github.com/python-security/pyt/blob/b07035f2812817ed8340303cc8df9aeee3168489/pyt/core/ast_helper.py#L81-L111 From c884bbfc8521741331e75fec0737001733c473e5 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sun, 24 Jun 2018 15:55:07 -0700 Subject: [PATCH 176/291] Update README.rst --- pyt/core/README.rst | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pyt/core/README.rst b/pyt/core/README.rst index 6c616ba2..becfa660 100644 --- a/pyt/core/README.rst +++ b/pyt/core/README.rst @@ -23,7 +23,18 @@ This directory contains miscellaneous code that is imported from different parts -`module_definitions.py`_ contains TODO +`module_definitions.py`_ contains classes created mostly in `stmt_visitor.py`_ + +- `project_definitions`_ is a global dictionary modifed in the `append_if_local_or_in_imports`_ method of `ModuleDefinitions`_, read in `framework_adaptor.py`_ to `obtain all function nodes`_. + +- `ModuleDefinition`_ is created to keep track of parent definitions when visiting functions, classes and __init__.py files in `stmt_visitor.py`_ + +- `LocalModuleDefinition`_ is created when visiting functions and classes in `stmt_visitor.py`_ + +- `ModuleDefinitions`_ contains `append_if_local_or_in_imports`_ which is used in when adding a function or class to the module definitions in + + +.. _obtain all function nodes: https://github.com/python-security/pyt/blob/02461063688fe02226e627c00adfb2c707d89aa0/pyt/web_frameworks/framework_adaptor.py#L93 `node_types.py`_ contains all the different node types created in `expr_visitor.py`_ and `stmt_visitor.py`_ From f60a51fcc8dc797e94205e2738550a5833bda0b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Wed, 27 Jun 2018 23:32:28 +0300 Subject: [PATCH 177/291] added tests for discover_files() --- tests/main_test.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/tests/main_test.py b/tests/main_test.py index aee80c68..2f7fe5ae 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -1,7 +1,7 @@ import mock from .base_test_case import BaseTestCase -from pyt.__main__ import main +from pyt.__main__ import main,discover_files class MainTest(BaseTestCase): @@ -60,3 +60,33 @@ def test_json_output(self, mock_json, mock_find_vulnerabilities, mock_parse_args mock_find_vulnerabilities.return_value, mock_parse_args.return_value.output_file ) + + def test_targets_with_no_excluded(self): + targets = ["examples/vulnerable_code/inter_command_injection.py"] + excluded_files = "" + + included_files = discover_files(targets,excluded_files) + expected = ["examples/vulnerable_code/inter_command_injection.py"] + self.assertListEqual(included_files,expected) + + def test_targets_with_exluded(self): + targets = ["examples/vulnerable_code/inter_command_injection.py"] + excluded_files = "examples/vulnerable_code/inter_command_injection.py" + + included_files = discover_files(targets,excluded_files) + expected = [] + self.assertListEqual(included_files,expected) + + def test_targets_with_recursive(self): + targets = ["examples/vulnerable_code/"] + excluded_files = "" + + included_files = discover_files(targets,excluded_files,True) + self.assertEqual(len(included_files),30) + + def test_targets_with_recursive_and_excluded(self): + targets = ["examples/vulnerable_code/"] + excluded_files = "inter_command_injection.py" + + included_files = discover_files(targets,excluded_files,True) + self.assertEqual(len(included_files),29) From 7ff6165f5a6e09c36491de002970d45aafe84beb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Wed, 27 Jun 2018 23:43:28 +0300 Subject: [PATCH 178/291] added new usage output --- README.rst | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/README.rst b/README.rst index af180b63..0870edc0 100644 --- a/README.rst +++ b/README.rst @@ -63,44 +63,48 @@ Usage .. code-block:: - usage: python -m pyt [-h] [-f FILEPATH] [-a ADAPTOR] [-pr PROJECT_ROOT] - [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] - [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [-trim] [-i] + usage: python -m pyt [-h] [-a ADAPTOR] [-pr PROJECT_ROOT] + [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] + [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] + [-r] [-x EXCLUDED_PATHS] [-trim] [-i] + targets [targets ...] required arguments: - -f FILEPATH, --filepath FILEPATH - Path to the file that should be analysed. + targets source file(s) or directory(s) to be tested optional arguments: -a ADAPTOR, --adaptor ADAPTOR - Choose a web framework adaptor: Flask(Default), - Django, Every or Pylons + Choose a web framework adaptor: Flask(Default), + Django, Every or Pylons -pr PROJECT_ROOT, --project-root PROJECT_ROOT - Add project root, only important when the entry file - is not at the root of the project. + Add project root, only important when the entry file + is not at the root of the project. -b BASELINE_JSON_FILE, --baseline BASELINE_JSON_FILE - Path of a baseline report to compare against (only - JSON-formatted files are accepted) + Path of a baseline report to compare against (only + JSON-formatted files are accepted) -j, --json Prints JSON instead of report. -m BLACKBOX_MAPPING_FILE, --blackbox-mapping-file BLACKBOX_MAPPING_FILE - Input blackbox mapping file. + Input blackbox mapping file. -t TRIGGER_WORD_FILE, --trigger-word-file TRIGGER_WORD_FILE - Input file with a list of sources and sinks + Input file with a list of sources and sinks -o OUTPUT_FILE, --output OUTPUT_FILE - write report to filename + write report to filename --ignore-nosec do not skip lines with # nosec comments + -r, --recursive find and process files in subdirectories + -x EXCLUDED_PATHS, --exclude EXCLUDED_PATHS + Separate files with commas print arguments: -trim, --trim-reassigned-in - Trims the reassigned list to just the vulnerability - chain. + Trims the reassigned list to just the vulnerability + chain. -i, --interactive Will ask you about each blackbox function call in - vulnerability chains. + vulnerability chains. Usage from Source ================= -Using it like a user ``python3 -m pyt -f example/vulnerable_code/XSS_call.py save -du`` +Using it like a user ``python3 -m pyt examples/vulnerable_code/XSS_call.py`` Running the tests ``python3 -m tests`` From 3dab96a8cd27a40a474de13d75e331d7bd604f01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Sat, 30 Jun 2018 16:25:17 +0300 Subject: [PATCH 179/291] added new things --- CHANGELOG.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 564342ec..3d8c48bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,10 +28,13 @@ If you love PyT, please star our project on GitHub to show your support! :star: #### :tada: New Features * Baseline support ([#106], thanks [@omergunal]) +* Whitelist lines ending in # nosec ([#121], [@omergunal]) +* Recursive option ([#129], [@omergunal]) -[#106]: https://github.com/python-security/pyt/pull/106 [@omergunal]: https://github.com/omergunal - +[#106]: https://github.com/python-security/pyt/pull/106 +[#129]: https://github.com/python-security/pyt/pull/129 +[#121]: https://github.com/python-security/pyt/pull/121 #### :sparkles: Usability * Combined all source/sink information files and made it the default ([#116]) @@ -43,11 +46,15 @@ If you love PyT, please star our project on GitHub to show your support! :star: * Fixed a bug where `visit_Raise` raised a `TypeError` ([#117], thanks [@lFatty]) * Fixed an infinite loop bug that was caused while handling certain loops ([#118]) * Fixed a bug where we were not including `pyt/vulnerability_definitions` files ([#122], thanks [@Ekultek]) +* Fixed flake8 errors ([#130]) #### :snake: Miscellaneous * Moved out a bunch of historical files to the [ReadTheDocs repo](https://github.com/KevinHock/rtdpyt) ([#110], [#111]) +* Re organize code ([#126]) +* Cleaned up pyt/core/ ([#132]) +[#126]: https://github.com/python-security/pyt/pull/126 [#116]: https://github.com/python-security/pyt/pull/116 [#115]: https://github.com/python-security/pyt/pull/115 [#119]: https://github.com/python-security/pyt/pull/119 @@ -58,3 +65,5 @@ If you love PyT, please star our project on GitHub to show your support! :star: [@lfatty]: https://github.com/lfatty [#122]: https://github.com/python-security/pyt/issues/122 [@Ekultek]: https://github.com/Ekultek +[#130]: https://github.com/python-security/pyt/pull/130 +[#132]: https://github.com/python-security/pyt/pull/132 From b5fce8c984a491afb7623293f07728e64cac08ff Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 30 Jun 2018 14:16:06 -0700 Subject: [PATCH 180/291] Fix nosec lines after recursive function, make all tests pass --- pyt/__main__.py | 54 ++++++++++--------- pyt/cfg/make_cfg.py | 14 +++-- pyt/helper_visitors/vars_visitor.py | 1 + pyt/vulnerabilities/vulnerabilities.py | 23 +++++--- pyt/vulnerabilities/vulnerability_helper.py | 8 ++- .../vulnerabilities_across_files_test.py | 6 ++- tests/vulnerabilities/vulnerabilities_test.py | 18 +++++-- 7 files changed, 85 insertions(+), 39 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 726cdf05..d3f94251 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -35,10 +35,10 @@ def discover_files(targets, excluded_files, recursive=False): excluded_list = excluded_files.split(",") for target in targets: if os.path.isdir(target): - for root, dirs, files in os.walk(target): - for f in files: - fullpath = os.path.join(root, f) - if os.path.splitext(fullpath)[1] == '.py' and fullpath.split("/")[-1] not in excluded_list: + for root, _, files in os.walk(target): + for file in files: + if file.endswith('.py') and file not in excluded_list: + fullpath = os.path.join(root, file) included_files.append(fullpath) if not recursive: break @@ -48,6 +48,18 @@ def discover_files(targets, excluded_files, recursive=False): return included_files +def retrieve_nosec_lines( + path +): + file = open(path, 'r') + lines = file.readlines() + return set( + lineno for + (lineno, line) in enumerate(lines, start=1) + if '#nosec' in line or '# nosec' in line + ) + + def main(command_line_args=sys.argv[1:]): # noqa: C901 args = parse_args(command_line_args) @@ -63,18 +75,11 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 args.recursive ) + nosec_lines = dict() + for path in files: - vulnerabilities = list() - if args.ignore_nosec: - nosec_lines = set() - else: - file = open(path, 'r') - lines = file.readlines() - nosec_lines = set( - lineno for - (lineno, line) in enumerate(lines, start=1) - if '#nosec' in line or '# nosec' in line - ) + if not args.ignore_nosec: + nosec_lines[path] = retrieve_nosec_lines(path) if args.project_root: directory = os.path.normpath(args.project_root) @@ -100,6 +105,7 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 framework_route_criteria = is_function_without_leading_ elif args.adaptor.lower().startswith('d'): framework_route_criteria = is_django_view_function + # Add all the route functions to the cfg_list FrameworkAdaptor( cfg_list, @@ -108,15 +114,15 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 framework_route_criteria ) - initialize_constraint_table(cfg_list) - analyse(cfg_list) - vulnerabilities.extend(find_vulnerabilities( - cfg_list, - ui_mode, - args.blackbox_mapping_file, - args.trigger_word_file, - nosec_lines - )) + initialize_constraint_table(cfg_list) + analyse(cfg_list) + vulnerabilities = find_vulnerabilities( + cfg_list, + ui_mode, + args.blackbox_mapping_file, + args.trigger_word_file, + nosec_lines + ) if args.baseline: vulnerabilities = get_vulnerabilities_not_in_baseline( diff --git a/pyt/cfg/make_cfg.py b/pyt/cfg/make_cfg.py index eaa78c9b..a60b734e 100644 --- a/pyt/cfg/make_cfg.py +++ b/pyt/cfg/make_cfg.py @@ -2,9 +2,15 @@ class CFG(): - def __init__(self, nodes, blackbox_assignments): + def __init__( + self, + nodes, + blackbox_assignments, + filename + ): self.nodes = nodes self.blackbox_assignments = blackbox_assignments + self.filename = filename def __repr__(self): output = '' @@ -29,10 +35,12 @@ def make_cfg( visitor = ExprVisitor( tree, project_modules, - local_modules, filename, + local_modules, + filename, module_definitions ) return CFG( visitor.nodes, - visitor.blackbox_assignments + visitor.blackbox_assignments, + filename ) diff --git a/pyt/helper_visitors/vars_visitor.py b/pyt/helper_visitors/vars_visitor.py index bda24c9b..22348528 100644 --- a/pyt/helper_visitors/vars_visitor.py +++ b/pyt/helper_visitors/vars_visitor.py @@ -126,6 +126,7 @@ def slicev(self, node): def visit_Subscript(self, node): if isinstance(node.value, ast.Attribute): + # foo.bar[1] self.result.append(list(get_call_names(node.value))[0]) self.visit(node.value) self.slicev(node.slice) diff --git a/pyt/vulnerabilities/vulnerabilities.py b/pyt/vulnerabilities/vulnerabilities.py index b41ae374..ef0a094a 100644 --- a/pyt/vulnerabilities/vulnerabilities.py +++ b/pyt/vulnerabilities/vulnerabilities.py @@ -39,14 +39,20 @@ def identify_triggers( cfg(CFG): CFG to find sources, sinks and sanitisers in. sources(tuple): list of sources, a source is a (source, sanitiser) tuple. sinks(tuple): list of sources, a sink is a (sink, sanitiser) tuple. + nosec_lines(set): lines with # nosec whitelisting Returns: Triggers tuple with sink and source nodes and a sanitiser node dict. """ assignment_nodes = filter_cfg_nodes(cfg, AssignmentNode) tainted_nodes = filter_cfg_nodes(cfg, TaintedNode) - tainted_trigger_nodes = [TriggerNode('Framework function URL parameter', None, - node) for node in tainted_nodes] + tainted_trigger_nodes = [ + TriggerNode( + 'Framework function URL parameter', + sanitisers=None, + cfg_node=node + ) for node in tainted_nodes + ] sources_in_file = find_triggers(assignment_nodes, sources, nosec_lines) sources_in_file.extend(tainted_trigger_nodes) @@ -136,6 +142,7 @@ def find_triggers( Args: nodes(list[Node]): the nodes to find triggers in. trigger_word_list(list[string]): list of trigger words to look for. + nosec_lines(set): lines with # nosec whitelisting Returns: List of found TriggerNodes @@ -441,13 +448,14 @@ def find_vulnerabilities_in_cfg( ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. blackbox_mapping(dict): A map of blackbox functions containing whether or not they propagate taint. vulnerabilities_list(list): That we append to when we find vulnerabilities. + nosec_lines(dict): filenames mapped to their nosec lines """ triggers = identify_triggers( cfg, definitions.sources, definitions.sinks, lattice, - nosec_lines + nosec_lines[cfg.filename] ) for sink in triggers.sinks: for source in triggers.sources: @@ -468,8 +476,8 @@ def find_vulnerabilities( cfg_list, ui_mode, blackbox_mapping_file, - source_sink_file, - nosec_lines=set() + sources_and_sinks_file, + nosec_lines ): """Find vulnerabilities in a list of CFGs from a trigger_word_file. @@ -477,13 +485,14 @@ def find_vulnerabilities( cfg_list(list[CFG]): the list of CFGs to scan. ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. blackbox_mapping_file(str) - source_sink_file(str) + sources_and_sinks_file(str) + nosec_lines(dict): filenames mapped to their nosec lines Returns: A list of vulnerabilities. """ vulnerabilities = list() - definitions = parse(source_sink_file) + definitions = parse(sources_and_sinks_file) with open(blackbox_mapping_file) as infile: blackbox_mapping = json.load(infile) diff --git a/pyt/vulnerabilities/vulnerability_helper.py b/pyt/vulnerabilities/vulnerability_helper.py index 1104de13..acef7bb0 100644 --- a/pyt/vulnerabilities/vulnerability_helper.py +++ b/pyt/vulnerabilities/vulnerability_helper.py @@ -162,7 +162,13 @@ def __str__(self): class TriggerNode(): - def __init__(self, trigger_word, sanitisers, cfg_node, secondary_nodes=[]): + def __init__( + self, + trigger_word, + sanitisers, + cfg_node, + secondary_nodes=[] + ): self.trigger_word = trigger_word self.sanitisers = sanitisers self.cfg_node = cfg_node diff --git a/tests/vulnerabilities/vulnerabilities_across_files_test.py b/tests/vulnerabilities/vulnerabilities_across_files_test.py index c0985723..86a6fa72 100644 --- a/tests/vulnerabilities/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities/vulnerabilities_across_files_test.py @@ -39,11 +39,15 @@ def run_analysis(self, path): analyse(cfg_list) + nosec_lines = { + path: [] + } return find_vulnerabilities( cfg_list, UImode.NORMAL, default_blackbox_mapping_file, - default_trigger_word_file + default_trigger_word_file, + nosec_lines ) def test_find_vulnerabilities_absolute_from_file_command_injection(self): diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 4c0dd2eb..2bc1c9a2 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -127,11 +127,15 @@ def run_analysis(self, path): analyse(cfg_list) + nosec_lines = { + path: [] + } return find_vulnerabilities( cfg_list, UImode.NORMAL, default_blackbox_mapping_file, - default_trigger_word_file + default_trigger_word_file, + nosec_lines ) def test_find_vulnerabilities_assign_other_var(self): @@ -516,11 +520,15 @@ def run_analysis(self, path): 'django_trigger_words.pyt' ) + nosec_lines = { + path: [] + } return find_vulnerabilities( cfg_list, UImode.NORMAL, default_blackbox_mapping_file, - trigger_word_file + trigger_word_file, + nosec_lines ) def test_django_view_param(self): @@ -558,11 +566,15 @@ def run_analysis(self, path): 'all_trigger_words.pyt' ) + nosec_lines = { + path: [] + } return find_vulnerabilities( cfg_list, UImode.NORMAL, default_blackbox_mapping_file, - trigger_word_file + trigger_word_file, + nosec_lines ) def test_self_is_not_tainted(self): From e1fd47d6af0c46c430354a913090154be02bec12 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 30 Jun 2018 15:00:52 -0700 Subject: [PATCH 181/291] So nobody sees no docs/ folder and says we don't have any docs :] --- docs/README.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 docs/README.rst diff --git a/docs/README.rst b/docs/README.rst new file mode 100644 index 00000000..173a79d5 --- /dev/null +++ b/docs/README.rst @@ -0,0 +1,3 @@ +`Start here`_. + +.. _Start here: https://github.com/python-security/pyt/tree/master/pyt From 4fb9d3e895fccf0f6ddb7ba6c29326ba8bea5ab7 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 30 Jun 2018 15:05:49 -0700 Subject: [PATCH 182/291] So nobody sees no docs/ folder and says we don't have any docs :] --- docs/README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/README.rst b/docs/README.rst index 173a79d5..31e35bb7 100644 --- a/docs/README.rst +++ b/docs/README.rst @@ -1,3 +1,6 @@ `Start here`_. +There is also `some documentation here`_, but it might be less helpful. + .. _Start here: https://github.com/python-security/pyt/tree/master/pyt +.. _some documentation here: http://pyt.readthedocs.io/en/latest/?badge=latest From 3dff5cf165bd8419b62d77bdec3d2152433b2ed4 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 30 Jun 2018 20:03:55 -0700 Subject: [PATCH 183/291] Use defaultdict for nosec_lines where applicable --- pyt/__main__.py | 3 ++- pyt/vulnerabilities/vulnerabilities.py | 3 ++- .../vulnerabilities_across_files_test.py | 6 +----- tests/vulnerabilities/vulnerabilities_test.py | 18 +++--------------- 4 files changed, 8 insertions(+), 22 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index d3f94251..a979eb6e 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -2,6 +2,7 @@ import os import sys +from collections import defaultdict from .analysis.constraint_table import initialize_constraint_table from .analysis.fixed_point import analyse @@ -75,7 +76,7 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 args.recursive ) - nosec_lines = dict() + nosec_lines = defaultdict(set) for path in files: if not args.ignore_nosec: diff --git a/pyt/vulnerabilities/vulnerabilities.py b/pyt/vulnerabilities/vulnerabilities.py index ef0a094a..772a5a38 100644 --- a/pyt/vulnerabilities/vulnerabilities.py +++ b/pyt/vulnerabilities/vulnerabilities.py @@ -2,6 +2,7 @@ import ast import json +from collections import defaultdict from ..analysis.definition_chains import build_def_use_chain from ..analysis.lattice import Lattice @@ -477,7 +478,7 @@ def find_vulnerabilities( ui_mode, blackbox_mapping_file, sources_and_sinks_file, - nosec_lines + nosec_lines=defaultdict(set) ): """Find vulnerabilities in a list of CFGs from a trigger_word_file. diff --git a/tests/vulnerabilities/vulnerabilities_across_files_test.py b/tests/vulnerabilities/vulnerabilities_across_files_test.py index 86a6fa72..c0985723 100644 --- a/tests/vulnerabilities/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities/vulnerabilities_across_files_test.py @@ -39,15 +39,11 @@ def run_analysis(self, path): analyse(cfg_list) - nosec_lines = { - path: [] - } return find_vulnerabilities( cfg_list, UImode.NORMAL, default_blackbox_mapping_file, - default_trigger_word_file, - nosec_lines + default_trigger_word_file ) def test_find_vulnerabilities_absolute_from_file_command_injection(self): diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 2bc1c9a2..4c0dd2eb 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -127,15 +127,11 @@ def run_analysis(self, path): analyse(cfg_list) - nosec_lines = { - path: [] - } return find_vulnerabilities( cfg_list, UImode.NORMAL, default_blackbox_mapping_file, - default_trigger_word_file, - nosec_lines + default_trigger_word_file ) def test_find_vulnerabilities_assign_other_var(self): @@ -520,15 +516,11 @@ def run_analysis(self, path): 'django_trigger_words.pyt' ) - nosec_lines = { - path: [] - } return find_vulnerabilities( cfg_list, UImode.NORMAL, default_blackbox_mapping_file, - trigger_word_file, - nosec_lines + trigger_word_file ) def test_django_view_param(self): @@ -566,15 +558,11 @@ def run_analysis(self, path): 'all_trigger_words.pyt' ) - nosec_lines = { - path: [] - } return find_vulnerabilities( cfg_list, UImode.NORMAL, default_blackbox_mapping_file, - trigger_word_file, - nosec_lines + trigger_word_file ) def test_self_is_not_tainted(self): From 920f7e642085a92081932570c091c7c192db5fe1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Sun, 1 Jul 2018 18:22:02 +0300 Subject: [PATCH 184/291] new class for discover_files tests --- tests/main_test.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/main_test.py b/tests/main_test.py index 2f7fe5ae..b55713b5 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -1,7 +1,7 @@ import mock from .base_test_case import BaseTestCase -from pyt.__main__ import main,discover_files +from pyt.__main__ import discover_files, main class MainTest(BaseTestCase): @@ -61,32 +61,34 @@ def test_json_output(self, mock_json, mock_find_vulnerabilities, mock_parse_args mock_parse_args.return_value.output_file ) + +class MainTest(BaseTestCase): def test_targets_with_no_excluded(self): targets = ["examples/vulnerable_code/inter_command_injection.py"] excluded_files = "" - included_files = discover_files(targets,excluded_files) + included_files = discover_files(targets, excluded_files) expected = ["examples/vulnerable_code/inter_command_injection.py"] - self.assertListEqual(included_files,expected) + self.assertListEqual(included_files, expected) def test_targets_with_exluded(self): targets = ["examples/vulnerable_code/inter_command_injection.py"] excluded_files = "examples/vulnerable_code/inter_command_injection.py" - included_files = discover_files(targets,excluded_files) + included_files = discover_files(targets, excluded_files) expected = [] - self.assertListEqual(included_files,expected) + self.assertListEqual(included_files, expected) def test_targets_with_recursive(self): targets = ["examples/vulnerable_code/"] excluded_files = "" - included_files = discover_files(targets,excluded_files,True) - self.assertEqual(len(included_files),30) + included_files = discover_files(targets, excluded_files, True) + self.assertEqual(len(included_files), 30) def test_targets_with_recursive_and_excluded(self): targets = ["examples/vulnerable_code/"] excluded_files = "inter_command_injection.py" - included_files = discover_files(targets,excluded_files,True) - self.assertEqual(len(included_files),29) + included_files = discover_files(targets, excluded_files, True) + self.assertEqual(len(included_files), 29) From 3c4d7734a44ec37b15a5f8e8e872f0af6c1af959 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20G=C3=BCnal?= Date: Sun, 1 Jul 2018 18:27:49 +0300 Subject: [PATCH 185/291] Update CHANGELOG.md --- CHANGELOG.md | 29 +++-------------------------- 1 file changed, 3 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d8c48bd..ba37a814 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,48 +22,25 @@ If you love PyT, please star our project on GitHub to show your support! :star: [@xxxx]: https://github.com/xxxx --> -# 0.34 -##### April 24, 2018 +# 0.35 +##### July 1, 2018 #### :tada: New Features -* Baseline support ([#106], thanks [@omergunal]) -* Whitelist lines ending in # nosec ([#121], [@omergunal]) * Recursive option ([#129], [@omergunal]) [@omergunal]: https://github.com/omergunal -[#106]: https://github.com/python-security/pyt/pull/106 [#129]: https://github.com/python-security/pyt/pull/129 -[#121]: https://github.com/python-security/pyt/pull/121 -#### :sparkles: Usability -* Combined all source/sink information files and made it the default ([#116]) - -#### :telescope: Precision -* Fixed a bug where `Post.query.paginate` propagated taint ([#115]) -* Fixed a false-positive where `self` was marked as taint ([#119], thanks [@lFatty]) #### :bug: Bugfixes -* Fixed a bug where `visit_Raise` raised a `TypeError` ([#117], thanks [@lFatty]) -* Fixed an infinite loop bug that was caused while handling certain loops ([#118]) -* Fixed a bug where we were not including `pyt/vulnerability_definitions` files ([#122], thanks [@Ekultek]) * Fixed flake8 errors ([#130]) #### :snake: Miscellaneous -* Moved out a bunch of historical files to the [ReadTheDocs repo](https://github.com/KevinHock/rtdpyt) ([#110], [#111]) * Re organize code ([#126]) * Cleaned up pyt/core/ ([#132]) -[#126]: https://github.com/python-security/pyt/pull/126 -[#116]: https://github.com/python-security/pyt/pull/116 +[#126]: https://github.com/python-security/pyt/pull/129 [#115]: https://github.com/python-security/pyt/pull/115 -[#119]: https://github.com/python-security/pyt/pull/119 -[#117]: https://github.com/python-security/pyt/pull/117 -[#118]: https://github.com/python-security/pyt/pull/118 -[#111]: https://github.com/python-security/pyt/pull/111 -[#110]: https://github.com/python-security/pyt/pull/110 -[@lfatty]: https://github.com/lfatty -[#122]: https://github.com/python-security/pyt/issues/122 -[@Ekultek]: https://github.com/Ekultek [#130]: https://github.com/python-security/pyt/pull/130 [#132]: https://github.com/python-security/pyt/pull/132 From 7c8f02b7e759e81a532c4e978e13db80fa68ed0e Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sun, 1 Jul 2018 12:22:42 -0700 Subject: [PATCH 186/291] Added Unreleased since 0.34 changes --- CHANGELOG.md | 61 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba37a814..011e12d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,25 +22,66 @@ If you love PyT, please star our project on GitHub to show your support! :star: [@xxxx]: https://github.com/xxxx --> -# 0.35 -##### July 1, 2018 +# Unreleased #### :tada: New Features -* Recursive option ([#129], [@omergunal]) +* Whitelist lines of sources and sinks ending in `# nosec` ([#121], thanks [@omergunal]) +* Ability to analyze directories, -r Recursive option ([#129], thanks [@omergunal]) -[@omergunal]: https://github.com/omergunal +[#121]: https://github.com/python-security/pyt/pull/121 [#129]: https://github.com/python-security/pyt/pull/129 -#### :bug: Bugfixes -* Fixed flake8 errors ([#130]) +#### :mortar_board: Walkthrough / Help + +* Added README.rst files to almost every directory. (Partially [#126]) #### :snake: Miscellaneous -* Re organize code ([#126]) -* Cleaned up pyt/core/ ([#132]) +* Fixed all flake8 errors ([#114] & [#130], thanks [@cclauss]) +* Re-organized the entire codebase into different directories ([#126]) +* Cleaned up the new pyt/core/ folder ([#132]) -[#126]: https://github.com/python-security/pyt/pull/129 -[#115]: https://github.com/python-security/pyt/pull/115 +[#126]: https://github.com/python-security/pyt/pull/126 +[#114]: https://github.com/python-security/pyt/pull/114 [#130]: https://github.com/python-security/pyt/pull/130 +[@cclauss]: https://github.com/cclauss [#132]: https://github.com/python-security/pyt/pull/132 + + +# 0.34 +##### April 24, 2018 + +#### :tada: New Features + +* Baseline support ([#106], thanks [@omergunal]) + +[#106]: https://github.com/python-security/pyt/pull/106 +[@omergunal]: https://github.com/omergunal + +#### :sparkles: Usability +* Combined all source/sink information files and made it the default ([#116]) + +#### :telescope: Precision +* Fixed a bug where `Post.query.paginate` propagated taint ([#115]) +* Fixed a false-positive where `self` was marked as taint ([#119], thanks [@lFatty]) + +#### :bug: Bugfixes +* Fixed a bug where `visit_Raise` raised a `TypeError` ([#117], thanks [@lFatty]) +* Fixed an infinite loop bug that was caused while handling certain loops ([#118]) +* Fixed a bug where we were not including `pyt/vulnerability_definitions` files ([#122], thanks [@Ekultek]) + +#### :snake: Miscellaneous + +* Moved out a bunch of historical files to the [ReadTheDocs repo](https://github.com/KevinHock/rtdpyt) ([#110], [#111]) + +[#116]: https://github.com/python-security/pyt/pull/116 +[#115]: https://github.com/python-security/pyt/pull/115 +[#119]: https://github.com/python-security/pyt/pull/119 +[#117]: https://github.com/python-security/pyt/pull/117 +[#118]: https://github.com/python-security/pyt/pull/118 +[#111]: https://github.com/python-security/pyt/pull/111 +[#110]: https://github.com/python-security/pyt/pull/110 +[@lfatty]: https://github.com/lfatty +[#122]: https://github.com/python-security/pyt/issues/122 +[@Ekultek]: https://github.com/Ekultek From 73db6281a0c360f1c2a027f0e9c1e9fa1d77dce0 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sun, 1 Jul 2018 12:23:38 -0700 Subject: [PATCH 187/291] Fix DiscoverFilesTest class name --- tests/main_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/main_test.py b/tests/main_test.py index b55713b5..4839da51 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -62,7 +62,7 @@ def test_json_output(self, mock_json, mock_find_vulnerabilities, mock_parse_args ) -class MainTest(BaseTestCase): +class DiscoverFilesTest(BaseTestCase): def test_targets_with_no_excluded(self): targets = ["examples/vulnerable_code/inter_command_injection.py"] excluded_files = "" From d8a90530fc61938d26b9ebbd091a2096571e01eb Mon Sep 17 00:00:00 2001 From: Tin Lam Date: Fri, 6 Jul 2018 03:33:50 -0500 Subject: [PATCH 188/291] Add unit test cases This patch set adds unit tests and increases coverage for `vars_visitor.py`. This also patch set also fix a slight typo in `vars_visitor.py`. Signed-off-by: Tin Lam --- pyt/helper_visitors/vars_visitor.py | 2 +- tests/helper_visitors/vars_visitor_test.py | 76 ++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/pyt/helper_visitors/vars_visitor.py b/pyt/helper_visitors/vars_visitor.py index 22348528..44366d84 100644 --- a/pyt/helper_visitors/vars_visitor.py +++ b/pyt/helper_visitors/vars_visitor.py @@ -25,7 +25,7 @@ def visit_UnaryOp(self, node): def visit_Lambda(self, node): self.visit(node.body) - def visit_IfExpr(self, node): + def visit_IfExp(self, node): self.visit(node.test) self.visit(node.body) self.visit(node.orelse) diff --git a/tests/helper_visitors/vars_visitor_test.py b/tests/helper_visitors/vars_visitor_test.py index 849206b8..a354c892 100644 --- a/tests/helper_visitors/vars_visitor_test.py +++ b/tests/helper_visitors/vars_visitor_test.py @@ -59,3 +59,79 @@ def test_keyword_numarg(self): def test_subscript(self): vars = self.perform_vars_on_expression('l[a] = x + y') self.assertEqual(vars.result, ['l', 'a', 'x', 'y']) + + def test_visit_boolop(self): + # AND operator + var1 = self.perform_vars_on_expression('b = x and y') + self.assertEqual(var1.result, ['b', 'x', 'y']) + + # OR operator + var2 = self.perform_vars_on_expression('b = x or y') + self.assertEqual(var2.result, ['b', 'x', 'y']) + + def test_visit_unaryop(self): + vars = self.perform_vars_on_expression('a = not b') + self.assertEqual(vars.result, ['a', 'b']) + + def test_visit_lambda(self): + vars = self.perform_vars_on_expression('f = lambda x: x + 2') + self.assertEqual(vars.result, ['f', 'x']) + + def test_visit_set(self): + vars = self.perform_vars_on_expression('{a, b, c}') + self.assertEqual(vars.result, ['a', 'b', 'c']) + + def test_visit_tuple(self): + vars = self.perform_vars_on_expression('(a, b, c)') + self.assertEqual(vars.result, ['a', 'b', 'c']) + + def test_visit_list(self): + vars = self.perform_vars_on_expression('[a, b, c]') + self.assertEqual(vars.result, ['a', 'b', 'c']) + + def test_visit_yield(self): + var1 = self.perform_vars_on_expression('yield exp') + self.assertEqual(var1.result, ['exp']) + + var2 = self.perform_vars_on_expression('yield from exp') + self.assertEqual(var2.result, ['exp']) + + def test_visit_listcomp(self): + vars = self.perform_vars_on_expression( + '[item for item in coll if cond]') + self.assertEqual(vars.result, ['item', 'item', 'coll', 'cond']) + + def test_visit_setcomp(self): + vars = self.perform_vars_on_expression('{a for b in d}') + self.assertEqual(vars.result, ['a', 'b', 'd']) + + def test_visit_dictcomp(self): + vars = self.perform_vars_on_expression('{k1: v1 for (k2, v2) in d}') + self.assertEqual(vars.result, ['k1', 'v1', 'k2', 'v2', 'd']) + + def test_visit_compare(self): + vars = self.perform_vars_on_expression('a == b') + self.assertEqual(vars.result, ['a', 'b']) + + def test_visit_starred(self): + vars = self.perform_vars_on_expression('*m = t') + self.assertEqual(vars.result, ['m', 't']) + + def test_visit_ifexp(self): + vars = self.perform_vars_on_expression('res if test else orelse') + self.assertEqual(vars.result, ['test', 'res', 'orelse']) + + def test_visit_subscript(self): + # simple slice + vars = self.perform_vars_on_expression('foo.bar[lower:upper:step]') + self.assertEqual(vars.result, ['foo', 'foo', 'lower', 'upper', 'step']) + + # extended slice + vars = self.perform_vars_on_expression('foo[item1:item2, item3]') + self.assertEqual(vars.result, ['foo', 'item1', 'item2', 'item3']) + + def test_visit_await(self): + vars = self.perform_vars_on_expression(""" +async def bar(): + await foo()""") + self.assertEqual(vars.result, []) From 10eea2291d7c2644422f63834fa641725184bdf6 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 7 Jul 2018 11:48:14 -0700 Subject: [PATCH 189/291] Raise minimum pyt/ coverage to 91%, indent and lstrip multiline visit_await test string --- tests/helper_visitors/vars_visitor_test.py | 5 +++-- tox.ini | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/helper_visitors/vars_visitor_test.py b/tests/helper_visitors/vars_visitor_test.py index a354c892..34597afd 100644 --- a/tests/helper_visitors/vars_visitor_test.py +++ b/tests/helper_visitors/vars_visitor_test.py @@ -132,6 +132,7 @@ def test_visit_subscript(self): def test_visit_await(self): vars = self.perform_vars_on_expression(""" -async def bar(): - await foo()""") + async def bar(): + await foo() + """.lstrip()) self.assertEqual(vars.result, []) diff --git a/tox.ini b/tox.ini index 4d5f66e8..aa701c72 100644 --- a/tox.ini +++ b/tox.ini @@ -8,7 +8,7 @@ commands = coverage erase coverage run tests coverage report --include=tests/* --fail-under 100 - coverage report --include=pyt/* --fail-under 87 + coverage report --include=pyt/* --fail-under 91 pre-commit run flake8 . --count --exclude=examples,venv,.tox --select=E901,E999,F821,F822,F823 --show-source --statistics flake8 . --count --exclude=examples,venv,.tox,dist --exit-zero --max-complexity=10 --max-line-length=127 --statistics From 6b3f155504cb7e087871c0eb622deef3c3e61c08 Mon Sep 17 00:00:00 2001 From: bcaller Date: Thu, 12 Jul 2018 14:19:26 +0100 Subject: [PATCH 190/291] Vars visitor handles python 3.5 dict syntax of the type {'a': 1, **x}. The **x will have key None. Before it would crash. --- pyt/helper_visitors/vars_visitor.py | 3 ++- tests/helper_visitors/vars_visitor_test.py | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pyt/helper_visitors/vars_visitor.py b/pyt/helper_visitors/vars_visitor.py index 44366d84..272744d8 100644 --- a/pyt/helper_visitors/vars_visitor.py +++ b/pyt/helper_visitors/vars_visitor.py @@ -32,7 +32,8 @@ def visit_IfExp(self, node): def visit_Dict(self, node): for k in node.keys: - self.visit(k) + if k is not None: + self.visit(k) for v in node.values: self.visit(v) diff --git a/tests/helper_visitors/vars_visitor_test.py b/tests/helper_visitors/vars_visitor_test.py index 34597afd..f248b6c4 100644 --- a/tests/helper_visitors/vars_visitor_test.py +++ b/tests/helper_visitors/vars_visitor_test.py @@ -136,3 +136,7 @@ async def bar(): await foo() """.lstrip()) self.assertEqual(vars.result, []) + + def test_visit_dict(self): + vars = self.perform_vars_on_expression('a = {k1: v1, k2: v2, **d1, **d2}') + self.assertEqual(vars.result, ['a', 'k1', 'k2', 'v1', 'v2', 'd1', 'd2']) From c292f1724279a54b52ae42e8f80dd70200f79c0c Mon Sep 17 00:00:00 2001 From: bcaller Date: Thu, 12 Jul 2018 14:37:57 +0100 Subject: [PATCH 191/291] Support AnnAssign in StmtVisitor Assignments with type annotations were added in python 3.6. --- .../assignment_with_annotation.py | 2 ++ pyt/cfg/stmt_visitor.py | 8 ++++++++ tests/cfg/cfg_test.py | 19 +++++++++++++++++++ 3 files changed, 29 insertions(+) create mode 100644 examples/example_inputs/assignment_with_annotation.py diff --git a/examples/example_inputs/assignment_with_annotation.py b/examples/example_inputs/assignment_with_annotation.py new file mode 100644 index 00000000..811876f8 --- /dev/null +++ b/examples/example_inputs/assignment_with_annotation.py @@ -0,0 +1,2 @@ +x: int +y: int=5 diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index c10548c0..68cb5587 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -422,6 +422,14 @@ def visit_Assign(self, node): path=self.filenames[-1] )) + def visit_AnnAssign(self, node): + if node.value is None: + return IgnoredNode() + else: + assign = ast.Assign(targets=[node.target], value=node.value) + ast.copy_location(assign, node) + return self.visit(assign) + def assignment_call_node(self, left_hand_label, ast_node): """Handle assignments that contain a function call on its right side.""" self.undecided = True # Used for handling functions in assignments diff --git a/tests/cfg/cfg_test.py b/tests/cfg/cfg_test.py index a42ac4e0..ece76213 100644 --- a/tests/cfg/cfg_test.py +++ b/tests/cfg/cfg_test.py @@ -720,6 +720,25 @@ def test_assignment_and_builtin_line_numbers(self): self.assertLineNumber(assign, 1) self.assertLineNumber(builtin, 2) + def test_assignment_with_annotation(self): + self.cfg_create_from_file('examples/example_inputs/assignment_with_annotation.py') + + self.assert_length(self.cfg.nodes, expected_length=3) + + entry = 0 + assign = 1 + exit_node = 2 + + self.assertInCfg([(assign, entry), (exit_node, assign)]) + self.assertEqual(self.cfg.nodes[assign].label, 'y = 5') + + def test_assignment_with_annotation_line_numbers(self): + self.cfg_create_from_file('examples/example_inputs/assignment_with_annotation.py') + + assign = self.cfg.nodes[1] + + self.assertLineNumber(assign, 2) + def test_multiple_assignment(self): self.cfg_create_from_file('examples/example_inputs/assignment_multiple_assign.py') From 5fec81b79cb7b8ba96abe7fb4df02d8d6b5a0bef Mon Sep 17 00:00:00 2001 From: bcaller Date: Thu, 12 Jul 2018 15:04:54 +0100 Subject: [PATCH 192/291] Fix typo in codeclimate yaml config --- .codeclimate.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.codeclimate.yml b/.codeclimate.yml index 4113b0e7..069b0ba4 100644 --- a/.codeclimate.yml +++ b/.codeclimate.yml @@ -23,7 +23,7 @@ exclude_paths: - "pyt/intraprocedural_cfg.py" - "pyt/repo_runner.py" - "pyt/save.py" -- "example/**" +- "examples/**" - "profiling/**" - "tests/**" - "LICENSE" From 67004afa7161349d38092a1564b4e07dabbe98fa Mon Sep 17 00:00:00 2001 From: bcaller Date: Thu, 12 Jul 2018 13:57:30 +0100 Subject: [PATCH 193/291] Add f-strings to label visitor Node types FormattedValue and JoinedStr were added in python 3.6. --- pyt/helper_visitors/label_visitor.py | 32 +++++++++++++++++++++ tests/helper_visitors/label_visitor_test.py | 8 ++++++ 2 files changed, 40 insertions(+) diff --git a/pyt/helper_visitors/label_visitor.py b/pyt/helper_visitors/label_visitor.py index ab2301b9..f594bff8 100644 --- a/pyt/helper_visitors/label_visitor.py +++ b/pyt/helper_visitors/label_visitor.py @@ -288,3 +288,35 @@ def visit_Name(self, node): def visit_Str(self, node): self.result += "'" + node.s + "'" + + def visit_joined_str(self, node, surround=True): + for val in node.values: + if isinstance(val, ast.Str): + self.result += val.s + else: + self.visit(val) + + def visit_JoinedStr(self, node): + """ + JoinedStr(expr* values) + """ + self.result += "f\'" + self.visit_joined_str(node) + self.result += "'" + + def visit_FormattedValue(self, node): + """ + FormattedValue(expr value, int? conversion, expr? format_spec) + """ + self.result += '{' + self.visit(node.value) + self.result += { + -1: '', # no formatting + 97: '!a', # ascii formatting + 114: '!r', # repr formatting + 115: '!s', # string formatting + }[node.conversion] + if node.format_spec: + self.result += ':' + self.visit_joined_str(node.format_spec) + self.result += '}' diff --git a/tests/helper_visitors/label_visitor_test.py b/tests/helper_visitors/label_visitor_test.py index 0f2d2f7d..e39f5151 100644 --- a/tests/helper_visitors/label_visitor_test.py +++ b/tests/helper_visitors/label_visitor_test.py @@ -71,3 +71,11 @@ def test_list_one_element(self): def test_list_two_elements(self): label = self.perform_labeling_on_expression('[1, 2]') self.assertEqual(label.result, '[1, 2]') + + def test_joined_str(self): + label = self.perform_labeling_on_expression('f"a{f(b)}{c}d"') + self.assertEqual(label.result, 'f\'a{f(b)}{c}d\'') + + def test_joined_str_with_format_spec(self): + label = self.perform_labeling_on_expression('f"a{b!s:.{length}}"') + self.assertEqual(label.result, 'f\'a{b!s:.{length}}\'') From 83e496f78a3d9ec295539ffcefc5614cab4bb919 Mon Sep 17 00:00:00 2001 From: Ben Caller Date: Thu, 19 Jul 2018 15:39:42 +0100 Subject: [PATCH 194/291] Make get_call_names more resilient Code where we call a method of something which isn't a variable name e.g. ``` yesterday = (date.today() - timedelta(days=1)).strftime("%Y-%m-%d") ``` was causing pyt to crash. Previously the else branch would only handle ast.Attribute, and crash on everything else. ``` Traceback (most recent call last): File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main "__main__", mod_spec) File "/usr/lib/python3.6/runpy.py", line 85, in _run_code exec(code, run_globals) File "/pyt/pyt/__main__.py", line 141, in main() File "/pyt/pyt/__main__.py", line 97, in main path File "/pyt/pyt/cfg/make_cfg.py", line 40, in make_cfg module_definitions File "/pyt/pyt/cfg/expr_visitor.py", line 60, in __init__ self.init_cfg(node) File "/pyt/pyt/cfg/expr_visitor.py", line 67, in init_cfg module_statements = self.visit(node) File "/usr/lib/python3.6/ast.py", line 253, in visit return visitor(node) File "/pyt/pyt/cfg/stmt_visitor.py", line 58, in visit_Module return self.stmt_star_handler(node.body) File "/pyt/pyt/cfg/stmt_visitor.py", line 79, in stmt_star_handler node = self.visit(stmt) File "/usr/lib/python3.6/ast.py", line 253, in visit return visitor(node) File "/pyt/pyt/cfg/stmt_visitor.py", line 413, in visit_Assign return self.assignment_call_node(label.result, node) File "/pyt/pyt/cfg/stmt_visitor.py", line 437, in assignment_call_node call = self.visit(ast_node.value) File "/usr/lib/python3.6/ast.py", line 253, in visit return visitor(node) File "/pyt/pyt/cfg/expr_visitor.py", line 540, in visit_Call _id = get_call_names_as_string(node.func) File "/pyt/pyt/core/ast_helper.py", line 78, in get_call_names_as_string return _list_to_dotted_string(get_call_names(node)) File "/pyt/pyt/core/ast_helper.py", line 68, in get_call_names return reversed(_get_call_names_helper(node, result)) File "/pyt/pyt/core/ast_helper.py", line 62, in _get_call_names_helper return _get_call_names_helper(node.value, result) File "/pyt/pyt/core/ast_helper.py", line 61, in _get_call_names_helper result.append(node.attr) AttributeError: 'BinOp' object has no attribute 'attr' ``` --- pyt/core/ast_helper.py | 21 ++++++++------------- tests/cfg/import_test.py | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/pyt/core/ast_helper.py b/pyt/core/ast_helper.py index dc4f8195..2b8776a5 100644 --- a/pyt/core/ast_helper.py +++ b/pyt/core/ast_helper.py @@ -44,28 +44,23 @@ def generate_ast(path): raise IOError('Input needs to be a file. Path: ' + path) -def _get_call_names_helper(node, result): +def _get_call_names_helper(node): """Recursively finds all function names.""" if isinstance(node, ast.Name): if node.id not in BLACK_LISTED_CALL_NAMES: - result.append(node.id) - return result - elif isinstance(node, ast.Call): - return result + yield node.id elif isinstance(node, ast.Subscript): - return _get_call_names_helper(node.value, result) + yield from _get_call_names_helper(node.value) elif isinstance(node, ast.Str): - result.append(node.s) - return result - else: - result.append(node.attr) - return _get_call_names_helper(node.value, result) + yield node.s + elif isinstance(node, ast.Attribute): + yield node.attr + yield from _get_call_names_helper(node.value) def get_call_names(node): """Get a list of call names.""" - result = list() - return reversed(_get_call_names_helper(node, result)) + return reversed(list(_get_call_names_helper(node))) def _list_to_dotted_string(list_of_components): diff --git a/tests/cfg/import_test.py b/tests/cfg/import_test.py index baa4d6e1..b4f00e73 100644 --- a/tests/cfg/import_test.py +++ b/tests/cfg/import_test.py @@ -733,3 +733,19 @@ def test_get_call_names_multi(self): result = get_call_names_as_string(call.func) self.assertEqual(result, 'abc.defg.hi') + + def test_get_call_names_with_binop(self): + m = ast.parse('(date.today() - timedelta(days=1)).strftime("%Y-%m-%d")') + call = m.body[0].value + + result = get_call_names_as_string(call.func) + + self.assertEqual(result, 'strftime') + + def test_get_call_names_with_comprehension(self): + m = ast.parse('{a for a in b()}.union(c)') + call = m.body[0].value + + result = get_call_names_as_string(call.func) + + self.assertEqual(result, 'union') From 93995b6851424b890db3fe7595673bc6112bf15f Mon Sep 17 00:00:00 2001 From: bcaller Date: Wed, 18 Jul 2018 15:32:08 +0100 Subject: [PATCH 195/291] Improve string comparison failure message in unit tests self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) just said False should be True without giving more context. --- .../vulnerabilities_across_files_test.py | 8 +++--- .../vulnerabilities_base_test_case.py | 7 +++++ tests/vulnerabilities/vulnerabilities_test.py | 28 +++++++++---------- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/tests/vulnerabilities/vulnerabilities_across_files_test.py b/tests/vulnerabilities/vulnerabilities_across_files_test.py index c0985723..70529f0d 100644 --- a/tests/vulnerabilities/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities/vulnerabilities_across_files_test.py @@ -83,7 +83,7 @@ def test_blackbox_library_call(self): Label: ~call_2 = ret_scrypt.encrypt('echo ' + param + ' >> ' + 'menu.txt', 'password') """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_builtin_with_user_defined_inner(self): vulnerabilities = self.run_analysis('examples/nested_functions_code/builtin_with_user_defined_inner.py') @@ -117,7 +117,7 @@ def test_builtin_with_user_defined_inner(self): ~call_3 = ret_subprocess.call(foo, shell=True) This vulnerability is unknown due to: Label: ~call_1 = ret_scrypt.encrypt(~call_2) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_sink_with_result_of_blackbox_nested(self): vulnerabilities = self.run_analysis('examples/nested_functions_code/sink_with_result_of_blackbox_nested.py') @@ -203,7 +203,7 @@ def test_sink_with_result_of_user_defined_nested(self): > reaches line 18, sink "subprocess.call(": ~call_3 = ret_subprocess.call(result, shell=True) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_sink_with_blackbox_inner(self): vulnerabilities = self.run_analysis('examples/nested_functions_code/sink_with_blackbox_inner.py') @@ -284,7 +284,7 @@ def test_sink_with_user_defined_inner(self): > reaches line 18, sink "subprocess.call(": ~call_1 = ret_subprocess.call(~call_2, shell=True) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_find_vulnerabilities_import_file_command_injection(self): vulnerabilities = self.run_analysis('examples/vulnerable_code_across_files/import_file_command_injection.py') diff --git a/tests/vulnerabilities/vulnerabilities_base_test_case.py b/tests/vulnerabilities/vulnerabilities_base_test_case.py index dcf088a4..c21f81ed 100644 --- a/tests/vulnerabilities/vulnerabilities_base_test_case.py +++ b/tests/vulnerabilities/vulnerabilities_base_test_case.py @@ -8,3 +8,10 @@ def string_compare_alpha(self, output, expected_string): [char for char in output if char.isalpha()] == [char for char in expected_string if char.isalpha()] ) + + def assertAlphaEqual(self, output, expected_string): + self.assertEqual( + [char for char in output if char.isalpha()], + [char for char in expected_string if char.isalpha()] + ) + return True diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 4c0dd2eb..c99e137f 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -168,7 +168,7 @@ def test_XSS_result(self): ~call_4 = ret_html.replace('{{ param }}', param) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_command_injection_result(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/command_injection.py') @@ -186,7 +186,7 @@ def test_command_injection_result(self): ~call_1 = ret_subprocess.call(command, shell=True) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_path_traversal_result(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/path_traversal.py') @@ -224,7 +224,7 @@ def test_path_traversal_result(self): ~call_4 = ret_send_file(foo) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_ensure_saved_scope(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/ensure_saved_scope.py') @@ -262,7 +262,7 @@ def test_ensure_saved_scope(self): ~call_4 = ret_send_file(image_name) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_path_traversal_sanitised_result(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/path_traversal_sanitised.py') @@ -289,7 +289,7 @@ def test_path_traversal_sanitised_result(self): This vulnerability is sanitised by: Label: ~call_2 = ret_image_name.replace('..', '') """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_path_traversal_sanitised_2_result(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/path_traversal_sanitised_2.py') @@ -312,7 +312,7 @@ def test_path_traversal_sanitised_2_result(self): This vulnerability is potentially sanitised by: Label: if '..' in image_name: """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_sql_result(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/sql/sqli.py') @@ -332,7 +332,7 @@ def test_sql_result(self): ~call_2 = ret_db.engine.execute(param) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_XSS_form_result(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_form.py') @@ -354,7 +354,7 @@ def test_XSS_form_result(self): ~call_2 = ret_html1.replace('{{ data }}', data) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_XSS_url_result(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_url.py') @@ -378,7 +378,7 @@ def test_XSS_url_result(self): ~call_3 = ret_html.replace('{{ param }}', param) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_XSS_no_vuln_result(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_no_vuln.py') @@ -408,7 +408,7 @@ def test_XSS_reassign_result(self): ~call_4 = ret_html.replace('{{ param }}', param) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_XSS_sanitised_result(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_sanitised.py') @@ -437,7 +437,7 @@ def test_XSS_sanitised_result(self): This vulnerability is sanitised by: Label: ~call_2 = ret_Markup.escape(param) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_XSS_variable_assign_no_vuln_result(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_variable_assign_no_vuln.py') @@ -467,7 +467,7 @@ def test_XSS_variable_assign_result(self): ~call_4 = ret_html.replace('{{ param }}', other_var) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) def test_XSS_variable_multiple_assign_result(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/XSS_variable_multiple_assign.py') @@ -497,7 +497,7 @@ def test_XSS_variable_multiple_assign_result(self): ~call_4 = ret_html.replace('{{ param }}', another_one) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) class EngineDjangoTest(VulnerabilitiesBaseTestCase): @@ -539,7 +539,7 @@ def test_django_view_param(self): > reaches line 5, sink "render(": ~call_1 = ret_render(request, 'templates/xss.html', 'param'param) """ - self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) class EngineEveryTest(VulnerabilitiesBaseTestCase): From 9186f9fde131c29f07c4ec6bc65bcb49cbaa127f Mon Sep 17 00:00:00 2001 From: bcaller Date: Wed, 18 Jul 2018 15:32:15 +0100 Subject: [PATCH 196/291] JSON trigger file Allows specifying extra sink options. --- .../trigger_definitions_parser.py | 63 +++++++-------- pyt/vulnerabilities/vulnerabilities.py | 19 ++--- pyt/vulnerabilities/vulnerability_helper.py | 14 +++- .../all_trigger_words.pyt | 80 +++++++++++-------- .../django_trigger_words.pyt | 76 ++++++++++-------- .../flask_trigger_words.pyt | 58 ++++++++------ .../test_triggers.pyt | 27 +++++-- tests/vulnerabilities/vulnerabilities_test.py | 42 ++++------ 8 files changed, 209 insertions(+), 170 deletions(-) diff --git a/pyt/vulnerabilities/trigger_definitions_parser.py b/pyt/vulnerabilities/trigger_definitions_parser.py index 62cdbce0..b188e280 100644 --- a/pyt/vulnerabilities/trigger_definitions_parser.py +++ b/pyt/vulnerabilities/trigger_definitions_parser.py @@ -1,10 +1,7 @@ +import json from collections import namedtuple -SANITISER_SEPARATOR = '->' -SOURCES_KEYWORD = 'sources:' -SINKS_KEYWORD = 'sinks:' - Definitions = namedtuple( 'Definitions', ( @@ -13,30 +10,30 @@ ) ) +Source = namedtuple('Source', ('trigger_word')) -def parse_section(iterator): - """Parse a section of a file. Stops at empty line. - Args: - iterator(File): file descriptor pointing at a definition file. +class Sink: + def __init__( + self, trigger, *, + sanitisers=None + ): + self._trigger = trigger + self.sanitisers = sanitisers or [] - Returns: - Iterator of all definitions in the section. - """ - try: - line = next(iterator).rstrip() - while line: - if line.rstrip(): - if SANITISER_SEPARATOR in line: - line = line.split(SANITISER_SEPARATOR) - sink = line[0].rstrip() - sanitisers = list(map(str.strip, line[1].split(','))) - yield (sink, sanitisers) - else: - yield (line, list()) - line = next(iterator).rstrip() - except StopIteration: - return + @property + def call(self): + if self._trigger[-1] == '(': + return self._trigger[:-1] + return None + + @property + def trigger_word(self): + return self._trigger + + @classmethod + def from_json(cls, key, data): + return cls(trigger=key, **data) def parse(trigger_word_file): @@ -45,13 +42,11 @@ def parse(trigger_word_file): Returns: A definitions tuple with sources and sinks. """ - sources = list() - sinks = list() - with open(trigger_word_file, 'r') as fd: - for line in fd: - line = line.rstrip() - if line == SOURCES_KEYWORD: - sources = list(parse_section(fd)) - elif line == SINKS_KEYWORD: - sinks = list(parse_section(fd)) + with open(trigger_word_file) as fd: + triggers_dict = json.load(fd) + sources = [Source(s) for s in triggers_dict['sources']] + sinks = [ + Sink.from_json(trigger, data) + for trigger, data in triggers_dict['sinks'].items() + ] return Definitions(sources, sinks) diff --git a/pyt/vulnerabilities/vulnerabilities.py b/pyt/vulnerabilities/vulnerabilities.py index 772a5a38..64b11892 100644 --- a/pyt/vulnerabilities/vulnerabilities.py +++ b/pyt/vulnerabilities/vulnerabilities.py @@ -16,7 +16,7 @@ RHSVisitor, VarsVisitor ) -from .trigger_definitions_parser import parse +from .trigger_definitions_parser import parse, Source from .vulnerability_helper import ( Sanitiser, TriggerNode, @@ -49,8 +49,7 @@ def identify_triggers( tainted_nodes = filter_cfg_nodes(cfg, TaintedNode) tainted_trigger_nodes = [ TriggerNode( - 'Framework function URL parameter', - sanitisers=None, + Source('Framework function URL parameter'), cfg_node=node ) for node in tainted_nodes ] @@ -142,7 +141,7 @@ def find_triggers( Args: nodes(list[Node]): the nodes to find triggers in. - trigger_word_list(list[string]): list of trigger words to look for. + trigger_word_list(list[Union[Sink, Source]]): list of trigger words to look for. nosec_lines(set): lines with # nosec whitelisting Returns: @@ -157,23 +156,21 @@ def find_triggers( def label_contains( node, - trigger_words + triggers ): """Determine if node contains any of the trigger_words provided. Args: node(Node): CFG node to check. - trigger_words(list[string]): list of trigger words to look for. + trigger_words(list[Union[Sink, Source]]): list of trigger words to look for. Returns: Iterable of TriggerNodes found. Can be multiple because multiple trigger_words can be in one node. """ - for trigger_word_tuple in trigger_words: - if trigger_word_tuple[0] in node.label: - trigger_word = trigger_word_tuple[0] - sanitisers = trigger_word_tuple[1] - yield TriggerNode(trigger_word, sanitisers, node) + for trigger in triggers: + if trigger.trigger_word in node.label: + yield TriggerNode(trigger, node) def build_sanitiser_node_dict( diff --git a/pyt/vulnerabilities/vulnerability_helper.py b/pyt/vulnerabilities/vulnerability_helper.py index acef7bb0..80a37491 100644 --- a/pyt/vulnerabilities/vulnerability_helper.py +++ b/pyt/vulnerabilities/vulnerability_helper.py @@ -164,16 +164,22 @@ def __str__(self): class TriggerNode(): def __init__( self, - trigger_word, - sanitisers, + trigger, cfg_node, secondary_nodes=[] ): - self.trigger_word = trigger_word - self.sanitisers = sanitisers + self.trigger = trigger self.cfg_node = cfg_node self.secondary_nodes = secondary_nodes + @property + def trigger_word(self): + return self.trigger.trigger_word + + @property + def sanitisers(self): + return self.trigger.sanitisers if hasattr(self.trigger, 'sanitisers') else [] + def append(self, cfg_node): if not cfg_node == self.cfg_node: if self.secondary_nodes and cfg_node not in self.secondary_nodes: diff --git a/pyt/vulnerability_definitions/all_trigger_words.pyt b/pyt/vulnerability_definitions/all_trigger_words.pyt index 656c8386..839ad6a4 100644 --- a/pyt/vulnerability_definitions/all_trigger_words.pyt +++ b/pyt/vulnerability_definitions/all_trigger_words.pyt @@ -1,34 +1,46 @@ -sources: -request.args.get( -Markup( -POST.get( -GET.get( -META.get( -POST[ -GET[ -META[ -FILES[ -.data -form[ -form( -mark_safe( -cookies[ -files[ -SQLAlchemy - -sinks: -replace( -> escape -send_file( -> '..', '..' in -execute( -system( -filter( -subprocess.call( -render_template( -set_cookie( -redirect( -url_for( -flash( -jsonify( -render( -render_to_response( -Popen( \ No newline at end of file +{ + "sources": [ + "request.args.get(", + "Markup(", + "POST.get(", + "GET.get(", + "META.get(", + "POST[", + "GET[", + "META[", + "FILES[", + ".data", + "form[", + "form(", + "mark_safe(", + "cookies[", + "files[", + "SQLAlchemy" + ], + "sinks": { + "replace(": { + "sanitisers": [ + "escape" + ] + }, + "send_file(": { + "sanitisers": [ + "'..'", + "'..' in" + ] + }, + "execute(": {}, + "system(": {}, + "filter(": {}, + "subprocess.call(": {}, + "render_template(": {}, + "set_cookie(": {}, + "redirect(": {}, + "url_for(": {}, + "flash(": {}, + "jsonify(": {}, + "render(": {}, + "render_to_response(": {}, + "Popen(": {} + } +} diff --git a/pyt/vulnerability_definitions/django_trigger_words.pyt b/pyt/vulnerability_definitions/django_trigger_words.pyt index 53b54f66..00131fe1 100644 --- a/pyt/vulnerability_definitions/django_trigger_words.pyt +++ b/pyt/vulnerability_definitions/django_trigger_words.pyt @@ -1,32 +1,44 @@ -sources: -POST.get( -GET.get( -META.get( -POST[ -GET[ -META[ -FILES[ -.data -form[ -form( -mark_safe( -cookies[ -files[ -SQLAlchemy - -sinks: -replace( -> escape -send_file( -> '..', '..' in -execute( -system( -filter( -subprocess.call( -render_template( -set_cookie( -redirect( -url_for( -flash( -jsonify( -render( -render_to_response( -Popen( \ No newline at end of file +{ + "sources": [ + "POST.get(", + "GET.get(", + "META.get(", + "POST[", + "GET[", + "META[", + "FILES[", + ".data", + "form[", + "form(", + "mark_safe(", + "cookies[", + "files[", + "SQLAlchemy" + ], + "sinks": { + "replace(": { + "sanitisers": [ + "escape" + ] + }, + "send_file(": { + "sanitisers": [ + "'..'", + "'..' in" + ] + }, + "execute(": {}, + "system(": {}, + "filter(": {}, + "subprocess.call(": {}, + "render_template(": {}, + "set_cookie(": {}, + "redirect(": {}, + "url_for(": {}, + "flash(": {}, + "jsonify(": {}, + "render(": {}, + "render_to_response(": {}, + "Popen(": {} + } +} diff --git a/pyt/vulnerability_definitions/flask_trigger_words.pyt b/pyt/vulnerability_definitions/flask_trigger_words.pyt index d7555a87..0dd49ebf 100644 --- a/pyt/vulnerability_definitions/flask_trigger_words.pyt +++ b/pyt/vulnerability_definitions/flask_trigger_words.pyt @@ -1,23 +1,35 @@ -sources: -request.args.get( -.data -form[ -form( -Markup( -cookies[ -files[ -SQLAlchemy - -sinks: -replace( -> escape -send_file( -> '..', '..' in -execute( -system( -filter( -subprocess.call( -render_template( -set_cookie( -redirect( -url_for( -flash( -jsonify( \ No newline at end of file +{ + "sources": [ + "request.args.get(", + ".data", + "form[", + "form(", + "Markup(", + "cookies[", + "files[", + "SQLAlchemy" + ], + "sinks": { + "replace(": { + "sanitisers": [ + "escape" + ] + }, + "send_file(": { + "sanitisers": [ + "'..'", + "'..' in" + ] + }, + "execute(": {}, + "system(": {}, + "filter(": {}, + "subprocess.call(": {}, + "render_template(": {}, + "set_cookie(": {}, + "redirect(": {}, + "url_for(": {}, + "flash(": {}, + "jsonify(": {} + } +} diff --git a/pyt/vulnerability_definitions/test_triggers.pyt b/pyt/vulnerability_definitions/test_triggers.pyt index cfa83a37..9388d539 100644 --- a/pyt/vulnerability_definitions/test_triggers.pyt +++ b/pyt/vulnerability_definitions/test_triggers.pyt @@ -1,7 +1,20 @@ -sources: -input - -sinks: -eval -> sanitise -horse -> japan, host, kost -valmue +{ + "sources": [ + "input" + ], + "sinks": { + "eval(": { + "sanitisers": [ + "sanitise" + ] + }, + "horse(": { + "sanitisers": [ + "japan", + "host", + "kost" + ] + }, + "valmue": {} + } +} diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index c99e137f..390a9214 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -11,10 +11,14 @@ ) from pyt.vulnerabilities import ( find_vulnerabilities, - trigger_definitions_parser, UImode, vulnerabilities ) +from pyt.vulnerabilities.trigger_definitions_parser import ( + parse, + Sink, + Source, +) from pyt.web_frameworks import ( FrameworkAdaptor, is_django_view_function, @@ -25,7 +29,7 @@ class EngineTest(VulnerabilitiesBaseTestCase): def test_parse(self): - definitions = trigger_definitions_parser.parse( + definitions = parse( trigger_word_file=os.path.join( os.getcwd(), 'pyt', @@ -36,44 +40,31 @@ def test_parse(self): self.assert_length(definitions.sources, expected_length=1) self.assert_length(definitions.sinks, expected_length=3) - self.assert_length(definitions.sinks[0][1], expected_length=1) - self.assert_length(definitions.sinks[1][1], expected_length=3) - - def test_parse_section(self): - list_ = list(trigger_definitions_parser.parse_section(iter(['get']))) - self.assert_length(list_, expected_length=1) - self.assertEqual(list_[0][0], 'get') - self.assertEqual(list_[0][1], list()) - - list_ = list(trigger_definitions_parser.parse_section(iter(['get', 'get -> a, b, c d s aq a']))) - self.assert_length(list_, expected_length=2) - self.assertEqual(list_[0][0], 'get') - self.assertEqual(list_[1][0], 'get') - self.assertEqual(list_[1][1], ['a', 'b', 'c d s aq a']) - self.assert_length(list_[1][1], expected_length=3) + self.assert_length(definitions.sinks[0].sanitisers, expected_length=1) + self.assert_length(definitions.sinks[1].sanitisers, expected_length=3) def test_label_contains(self): cfg_node = Node('label', None, line_number=None, path=None) - trigger_words = [('get', [])] + trigger_words = [Source('get')] list_ = list(vulnerabilities.label_contains(cfg_node, trigger_words)) self.assert_length(list_, expected_length=0) cfg_node = Node('request.get("stefan")', None, line_number=None, path=None) - trigger_words = [('get', []), ('request', [])] + trigger_words = [Sink('request'), Source('get')] list_ = list(vulnerabilities.label_contains(cfg_node, trigger_words)) self.assert_length(list_, expected_length=2) trigger_node_1 = list_[0] trigger_node_2 = list_[1] - self.assertEqual(trigger_node_1.trigger_word, 'get') + self.assertEqual(trigger_node_1.trigger_word, 'request') self.assertEqual(trigger_node_1.cfg_node, cfg_node) - self.assertEqual(trigger_node_2.trigger_word, 'request') + self.assertEqual(trigger_node_2.trigger_word, 'get') self.assertEqual(trigger_node_2.cfg_node, cfg_node) cfg_node = Node('request.get("stefan")', None, line_number=None, path=None) - trigger_words = [('get', []), ('get', [])] + trigger_words = [Source('get'), Source('get'), Sink('get(')] list_ = list(vulnerabilities.label_contains(cfg_node, trigger_words)) - self.assert_length(list_, expected_length=2) + self.assert_length(list_, expected_length=3) def test_find_triggers(self): self.cfg_create_from_file('examples/vulnerable_code/XSS.py') @@ -83,7 +74,7 @@ def test_find_triggers(self): FrameworkAdaptor(cfg_list, [], [], is_flask_route_function) XSS1 = cfg_list[1] - trigger_words = [('get', [])] + trigger_words = [Source('get')] list_ = vulnerabilities.find_triggers( XSS1.nodes, @@ -110,7 +101,8 @@ def test_build_sanitiser_node_dict(self): cfg = cfg_list[1] cfg_node = Node(None, None, line_number=None, path=None) - sinks_in_file = [vulnerabilities.TriggerNode('replace', ['escape'], cfg_node)] + sink = Sink.from_json('replace', {'sanitisers': ['escape']}) + sinks_in_file = [vulnerabilities.TriggerNode(sink, cfg_node)] sanitiser_dict = vulnerabilities.build_sanitiser_node_dict(cfg, sinks_in_file) self.assert_length(sanitiser_dict, expected_length=1) From b6376dac6d1534e395e515f1ce385c3bb28a667d Mon Sep 17 00:00:00 2001 From: bcaller Date: Wed, 18 Jul 2018 15:32:19 +0100 Subject: [PATCH 197/291] Add request.get_json() to flask sources of taint --- pyt/vulnerability_definitions/all_trigger_words.pyt | 1 + pyt/vulnerability_definitions/flask_trigger_words.pyt | 1 + 2 files changed, 2 insertions(+) diff --git a/pyt/vulnerability_definitions/all_trigger_words.pyt b/pyt/vulnerability_definitions/all_trigger_words.pyt index 839ad6a4..5642db5c 100644 --- a/pyt/vulnerability_definitions/all_trigger_words.pyt +++ b/pyt/vulnerability_definitions/all_trigger_words.pyt @@ -1,6 +1,7 @@ { "sources": [ "request.args.get(", + "request.get_json(", "Markup(", "POST.get(", "GET.get(", diff --git a/pyt/vulnerability_definitions/flask_trigger_words.pyt b/pyt/vulnerability_definitions/flask_trigger_words.pyt index 0dd49ebf..db995849 100644 --- a/pyt/vulnerability_definitions/flask_trigger_words.pyt +++ b/pyt/vulnerability_definitions/flask_trigger_words.pyt @@ -1,6 +1,7 @@ { "sources": [ "request.args.get(", + "request.get_json(", ".data", "form[", "form(", From d68554cc13a4de71e64e7b2c84a20d096214ac57 Mon Sep 17 00:00:00 2001 From: bcaller Date: Thu, 19 Jul 2018 16:06:03 +0100 Subject: [PATCH 198/291] Certain args, kwargs of sink functions are affected by taint The aim of this is to reduce the number of false positives. In the json trigger file you can specify a list of arg positions or keywords which propagate or ignore taint. So e.g there may be an execute function where the first argument is raw SQL query text which is affected by taint, but any other arguments are variables passed in to the prepared SQL statement and (we assume) can be tainted user input without a problem. To do this we can no longer just use the RHSVisitor to see which variables are used by a sink. We need the results of each arg / kwarg separately. There is the added complication of *args and **kwargs where we need to determine if a RHS variable could pass into a propagating arg / kwarg or not. Probably the best way to understand it is to look at the added test cases. --- examples/vulnerable_code/sql/sqli.py | 8 +++ pyt/cfg/stmt_visitor.py | 1 + pyt/core/node_types.py | 4 +- pyt/helper_visitors/__init__.py | 2 + pyt/helper_visitors/call_visitor.py | 71 +++++++++++++++++++ .../right_hand_side_visitor.py | 6 ++ .../trigger_definitions_parser.py | 25 +++++++ pyt/vulnerabilities/vulnerabilities.py | 40 ++++++++++- .../test_positions.pyt | 28 ++++++++ tests/base_test_case.py | 18 ++++- tests/helper_visitors/call_visitor_test.py | 52 ++++++++++++++ tests/vulnerabilities/vulnerabilities_test.py | 61 +++++++++++++++- 12 files changed, 309 insertions(+), 7 deletions(-) create mode 100644 pyt/helper_visitors/call_visitor.py create mode 100644 pyt/vulnerability_definitions/test_positions.pyt create mode 100644 tests/helper_visitors/call_visitor_test.py diff --git a/examples/vulnerable_code/sql/sqli.py b/examples/vulnerable_code/sql/sqli.py index 7435ae67..a0c589d4 100644 --- a/examples/vulnerable_code/sql/sqli.py +++ b/examples/vulnerable_code/sql/sqli.py @@ -38,5 +38,13 @@ def filtering(): print(value.username, value.email) return 'Result is displayed in console.' +@app.route('/users/', methods=['DELETE']) +def delete_user_dangerously(name): + query = "DELETE FROM user WHERE username = :name" + db.engine.execute(query, name=name) + print('Deleted') + return 'Deleted' + + if __name__ == '__main__': app.run(debug=True) diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index 68cb5587..ba463c32 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -573,6 +573,7 @@ def add_blackbox_or_builtin_call(self, node, blackbox): call_node = BBorBInode( label='', left_hand_side=LHS, + ast_node=node, right_hand_side_variables=[], line_number=node.lineno, path=self.filenames[-1], diff --git a/pyt/core/node_types.py b/pyt/core/node_types.py index 6cc2f1eb..5981c5e8 100644 --- a/pyt/core/node_types.py +++ b/pyt/core/node_types.py @@ -202,7 +202,7 @@ def __init__(self, label, left_hand_side, right_hand_side_variables, *, line_num class BBorBInode(AssignmentNode): """Node used for handling restore nodes returning from blackbox or builtin function calls.""" - def __init__(self, label, left_hand_side, right_hand_side_variables, *, line_number, path, func_name): + def __init__(self, label, left_hand_side, ast_node, right_hand_side_variables, *, line_number, path, func_name): """Create a Restore node. Args: @@ -213,7 +213,7 @@ def __init__(self, label, left_hand_side, right_hand_side_variables, *, line_num path(string): Current filename. func_name(string): The string we will compare with the blackbox_mapping in vulnerabilities.py """ - super().__init__(label, left_hand_side, None, right_hand_side_variables, line_number=line_number, path=path) + super().__init__(label, left_hand_side, ast_node, right_hand_side_variables, line_number=line_number, path=path) self.args = list() self.inner_most_call = self self.func_name = func_name diff --git a/pyt/helper_visitors/__init__.py b/pyt/helper_visitors/__init__.py index ffd5f878..47cbcea1 100644 --- a/pyt/helper_visitors/__init__.py +++ b/pyt/helper_visitors/__init__.py @@ -1,9 +1,11 @@ +from .call_visitor import CallVisitor from .label_visitor import LabelVisitor from .right_hand_side_visitor import RHSVisitor from .vars_visitor import VarsVisitor __all__ = [ + 'CallVisitor', 'LabelVisitor', 'RHSVisitor', 'VarsVisitor' diff --git a/pyt/helper_visitors/call_visitor.py b/pyt/helper_visitors/call_visitor.py new file mode 100644 index 00000000..9b0d7b67 --- /dev/null +++ b/pyt/helper_visitors/call_visitor.py @@ -0,0 +1,71 @@ +import ast +import re +from collections import defaultdict, namedtuple +from itertools import count + +from ..core.ast_helper import get_call_names_as_string +from .right_hand_side_visitor import RHSVisitor + + +class CallVisitorResults( + namedtuple( + "CallVisitorResults", + ("args", "kwargs", "unknown_args", "unknown_kwargs") + ) +): + __slots__ = () + + def all_results(self): + for x in self.args: + yield from x + for x in self.kwargs.values(): + yield from x + yield from self.unknown_args + yield from self.unknown_kwargs + + +class CallVisitor(ast.NodeVisitor): + def __init__(self, trigger_str): + self.unknown_arg_visitor = RHSVisitor() + self.unknown_kwarg_visitor = RHSVisitor() + self.argument_visitors = defaultdict(lambda: RHSVisitor()) + self._trigger_str = trigger_str + + def visit_Call(self, call_node): + func_name = get_call_names_as_string(call_node.func) + trigger_re = r"(^|\.){}$".format(re.escape(self._trigger_str)) + if re.search(trigger_re, func_name): + seen_starred = False + for index, arg in enumerate(call_node.args): + if isinstance(arg, ast.Starred): + seen_starred = True + if seen_starred: + self.unknown_arg_visitor.visit(arg) + else: + self.argument_visitors[index].visit(arg) + + for keyword in call_node.keywords: + if keyword.arg is None: + self.unknown_kwarg_visitor.visit(keyword.value) + else: + self.argument_visitors[keyword.arg].visit(keyword.value) + self.generic_visit(call_node) + + @classmethod + def get_call_visit_results(cls, trigger_str, node): + visitor = cls(trigger_str) + visitor.visit(node) + + arg_results = [] + for i in count(): + try: + arg_results.append(set(visitor.argument_visitors.pop(i).result)) + except KeyError: + break + + return CallVisitorResults( + arg_results, + {k: set(v.result) for k, v in visitor.argument_visitors.items()}, + set(visitor.unknown_arg_visitor.result), + set(visitor.unknown_kwarg_visitor.result), + ) diff --git a/pyt/helper_visitors/right_hand_side_visitor.py b/pyt/helper_visitors/right_hand_side_visitor.py index 8dcf9ea4..629a94bb 100644 --- a/pyt/helper_visitors/right_hand_side_visitor.py +++ b/pyt/helper_visitors/right_hand_side_visitor.py @@ -21,3 +21,9 @@ def visit_Call(self, node): if node.keywords: for keyword in node.keywords: self.visit(keyword) + + @classmethod + def result_for_node(cls, node): + visitor = cls() + visitor.visit(node) + return visitor.result diff --git a/pyt/vulnerabilities/trigger_definitions_parser.py b/pyt/vulnerabilities/trigger_definitions_parser.py index b188e280..ab737928 100644 --- a/pyt/vulnerabilities/trigger_definitions_parser.py +++ b/pyt/vulnerabilities/trigger_definitions_parser.py @@ -16,10 +16,35 @@ class Sink: def __init__( self, trigger, *, + unlisted_args_propagate=True, unlisted_kwargs_propagate=True, + arg_list=None, kwarg_list=None, sanitisers=None ): self._trigger = trigger self.sanitisers = sanitisers or [] + self.arg_list_propagates = not unlisted_args_propagate + self.kwarg_list_propagates = not unlisted_kwargs_propagate + + if trigger[-1] != '(': + if self.arg_list_propagates or self.kwarg_list_propagates or arg_list or kwarg_list: + raise ValueError("Propagation options specified, but trigger word isn't a function call") + + self.arg_list = set(arg_list or ()) + self.kwarg_list = set(kwarg_list or ()) + + def arg_propagates(self, index): + in_list = index in self.arg_list + return self.arg_list_propagates == in_list + + def kwarg_propagates(self, keyword): + in_list = keyword in self.kwarg_list + return self.kwarg_list_propagates == in_list + + @property + def all_arguments_propagate_taint(self): + if self.arg_list or self.kwarg_list: + return False + return True @property def call(self): diff --git a/pyt/vulnerabilities/vulnerabilities.py b/pyt/vulnerabilities/vulnerabilities.py index 64b11892..7fd14cd7 100644 --- a/pyt/vulnerabilities/vulnerabilities.py +++ b/pyt/vulnerabilities/vulnerabilities.py @@ -13,6 +13,7 @@ TaintedNode ) from ..helper_visitors import ( + CallVisitor, RHSVisitor, VarsVisitor ) @@ -240,6 +241,37 @@ def get_sink_args(cfg_node): return vv.result +def get_sink_args_which_propagate(sink, ast_node): + sink_args_with_positions = CallVisitor.get_call_visit_results(sink.trigger.call, ast_node) + sink_args = [] + + for i, vars in enumerate(sink_args_with_positions.args): + if sink.trigger.arg_propagates(i): + sink_args.extend(vars) + + if ( + # Either any unspecified arg propagates + not sink.trigger.arg_list_propagates or + # or there are some propagating args which weren't passed positionally + any(1 for position in sink.trigger.arg_list if position >= len(sink_args_with_positions.args)) + ): + sink_args.extend(sink_args_with_positions.unknown_args) + + for keyword, vars in sink_args_with_positions.kwargs.items(): + if sink.trigger.kwarg_propagates(keyword): + sink_args.extend(vars) + + if ( + # Either any unspecified kwarg propagates + not sink.trigger.kwarg_list_propagates or + # or there are some propagating kwargs which have not been passed by keyword + sink.trigger.kwarg_list - set(sink_args_with_positions.kwargs.keys()) + ): + sink_args.extend(sink_args_with_positions.unknown_kwargs) + + return sink_args + + def get_vulnerability_chains( current_node, sink, @@ -374,10 +406,14 @@ def get_vulnerability( sink.cfg_node)] nodes_in_constaint.append(source.cfg_node) - sink_args = get_sink_args(sink.cfg_node) + if sink.trigger.all_arguments_propagate_taint: + sink_args = get_sink_args(sink.cfg_node) + else: + sink_args = get_sink_args_which_propagate(sink, sink.cfg_node.ast_node) + tainted_node_in_sink_arg = get_tainted_node_in_sink_args( sink_args, - nodes_in_constaint + nodes_in_constaint, ) if tainted_node_in_sink_arg: diff --git a/pyt/vulnerability_definitions/test_positions.pyt b/pyt/vulnerability_definitions/test_positions.pyt new file mode 100644 index 00000000..48e276fe --- /dev/null +++ b/pyt/vulnerability_definitions/test_positions.pyt @@ -0,0 +1,28 @@ +{ + "sources": [ + "request.args.get(", + "make_taint(" + ], + "sinks": { + "normal(": {}, + "execute(": { + "unlisted_args_propagate": false, + "arg_list": [ + 0 + ], + "unlisted_kwargs_propagate": false, + "kwarg_list": [ + "text" + ] + }, + "run(": { + "kwarg_list": [ + "non_propagating" + ], + "arg_list": [ + 2, + 3 + ] + } + } +} diff --git a/tests/base_test_case.py b/tests/base_test_case.py index 21b7c695..1283bf47 100644 --- a/tests/base_test_case.py +++ b/tests/base_test_case.py @@ -9,9 +9,9 @@ class BaseTestCase(unittest.TestCase): """A base class that has helper methods for testing PyT.""" - def assert_length(self, _list, *, expected_length): + def assert_length(self, _list, *, expected_length, msg=None): actual_length = len(_list) - self.assertEqual(expected_length, actual_length) + self.assertEqual(expected_length, actual_length, msg=msg) def cfg_create_from_file( self, @@ -27,3 +27,17 @@ def cfg_create_from_file( local_modules, filename ) + + def cfg_create_from_ast( + self, + ast_tree, + project_modules=list(), + local_modules=list() + ): + project_definitions.clear() + self.cfg = make_cfg( + ast_tree, + project_modules, + local_modules, + filename='?' + ) diff --git a/tests/helper_visitors/call_visitor_test.py b/tests/helper_visitors/call_visitor_test.py new file mode 100644 index 00000000..304f5976 --- /dev/null +++ b/tests/helper_visitors/call_visitor_test.py @@ -0,0 +1,52 @@ +import ast +import unittest + +from pyt.helper_visitors import CallVisitor + + +class CallVisitorTest(unittest.TestCase): + def get_results(self, call_name, expr): + tree = ast.parse(expr) + return CallVisitor.get_call_visit_results(trigger_str=call_name, node=tree) + + def test_basic(self): + call_args = self.get_results('func', 'func(a, b, x=c)') + self.assertEqual(call_args.args, [{'a'}, {'b'}]) + self.assertEqual(call_args.kwargs, {'x': {'c'}}) + self.assertEqual(call_args.unknown_args, set()) + self.assertEqual(call_args.unknown_kwargs, set()) + + def test_visits_each_argument_recursively(self): + call_args = self.get_results('func', 'func(a + b, f(123), g(h(c=d)), e=i(123))') + self.assertEqual(call_args.args, [{'a', 'b'}, set(), {'d'}]) + self.assertEqual(call_args.kwargs, {'e': set()}) + self.assertEqual(call_args.unknown_args, set()) + self.assertEqual(call_args.unknown_kwargs, set()) + + def test_merge_when_function_called_inside_own_arguments(self): + call_args = self.get_results('func', 'func(a + func(b, c, x=d), e)') + self.assertEqual(call_args.args, [{'a', 'b', 'c', 'd'}, {'c', 'e'}]) + self.assertEqual(call_args.kwargs, {'x': {'d'}}) + self.assertEqual(call_args.unknown_args, set()) + self.assertEqual(call_args.unknown_kwargs, set()) + + def test_star_args_kwargs(self): + call_args = self.get_results('func', 'func(a, b, *c, *d, x=e, **f, **g)') + self.assertEqual(call_args.args, [{'a'}, {'b'}]) + self.assertEqual(call_args.kwargs, {'x': {'e'}}) + self.assertEqual(call_args.unknown_args, {'c', 'd'}) + self.assertEqual(call_args.unknown_kwargs, {'f', 'g'}) + + def test_call_inside_comprehension(self): + call_args = self.get_results('func', '[row for row in db.func(a, b)]') + self.assertEqual(call_args.args, [{'a'}, {'b'}]) + self.assertEqual(call_args.kwargs, {}) + self.assertEqual(call_args.unknown_args, set()) + self.assertEqual(call_args.unknown_kwargs, set()) + + def test_call_inside_comprehension_2(self): + call_args = self.get_results('func', '[func(a, b) for b in c]') + self.assertEqual(call_args.args, [{'a'}, {'b'}]) + self.assertEqual(call_args.kwargs, {}) + self.assertEqual(call_args.unknown_args, set()) + self.assertEqual(call_args.unknown_kwargs, set()) diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 390a9214..20d142ba 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -1,3 +1,4 @@ +import ast import os from .vulnerabilities_base_test_case import VulnerabilitiesBaseTestCase @@ -308,7 +309,7 @@ def test_path_traversal_sanitised_2_result(self): def test_sql_result(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/sql/sqli.py') - self.assert_length(vulnerabilities, expected_length=1) + self.assert_length(vulnerabilities, expected_length=2) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/sql/sqli.py @@ -560,3 +561,61 @@ def run_analysis(self, path): def test_self_is_not_tainted(self): vulnerabilities = self.run_analysis('examples/example_inputs/def_with_self_as_first_arg.py') self.assert_length(vulnerabilities, expected_length=0) + + +class EnginePositionTest(VulnerabilitiesBaseTestCase): + def run_analysis(self): + cfg_list = [self.cfg] + + FrameworkAdaptor(cfg_list, [], [], is_flask_route_function) + initialize_constraint_table(cfg_list) + + analyse(cfg_list) + + trigger_word_file = os.path.join( + 'pyt', + 'vulnerability_definitions', + 'test_positions.pyt' + ) + + return find_vulnerabilities( + cfg_list, + UImode.NORMAL, + default_blackbox_mapping_file, + trigger_word_file + ) + + def test_sql_result_ignores_false_positive_prepared_statement(self): + self.cfg_create_from_file('examples/vulnerable_code/sql/sqli.py') + vulnerabilities = self.run_analysis() + self.assert_length(vulnerabilities, expected_length=1) + self.assertEqual(vulnerabilities[0].source.line_number, 26) + + def test_args_kwargs_that_do_dont_propagate(self): + def check(fixture, vulnerable): + tree = ast.parse('TAINT = make_taint()\n' + fixture) + self.cfg_create_from_ast(tree) + vulnerabilities = self.run_analysis() + self.assert_length(vulnerabilities, expected_length=1 if vulnerable else 0, msg=fixture) + + no_vuln_fixtures = ( + 'execute(0)', + 'run(0, x, TAINT, 0, x=x)', + 'run(x, 0, non_propagating=TAINT)', + 'execute(x, name=TAINT)', + 'execute(x, *TAINT)', + 'execute(text=x, **TAINT)', + 'dont_run(TAINT)', + ) + vuln_fixtures = ( + 'run(TAINT)', + 'subprocess.run(TAINT)', + 'run(0, TAINT, 0, x=0)', + 'run(0, x, non_propagating=x, tainted=TAINT)', + 'execute(*ok, *TAINT)', + 'execute(name=x, **TAINT)', + ) + for fixture_str in no_vuln_fixtures: + check(fixture_str, False) + for fixture_str in vuln_fixtures: + check(fixture_str, True) From 490128cfb041812d15464fbbe06983c4f31b624c Mon Sep 17 00:00:00 2001 From: bcaller Date: Thu, 12 Jul 2018 19:31:38 +0100 Subject: [PATCH 199/291] Handle list assignment like tuples [a, b] = (c, d) --- pyt/cfg/stmt_visitor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index ba463c32..d99c9ae8 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -380,8 +380,8 @@ def assign_multi_target(self, node, right_hand_side_variables): def visit_Assign(self, node): rhs_visitor = RHSVisitor() rhs_visitor.visit(node.value) - if isinstance(node.targets[0], ast.Tuple): # x,y = [1,2] - if isinstance(node.value, ast.Tuple): + if isinstance(node.targets[0], (ast.Tuple, ast.List)): # x,y = [1,2] + if isinstance(node.value, (ast.Tuple, ast.List)): return self.assign_tuple_target(node, rhs_visitor.result) elif isinstance(node.value, ast.Call): call = None From 2b43ae094dd07349795c12825598e5a10dfcaf9d Mon Sep 17 00:00:00 2001 From: bcaller Date: Thu, 12 Jul 2018 18:21:03 +0100 Subject: [PATCH 200/291] Print asterisk before a Starred in LabelVisitor --- pyt/helper_visitors/label_visitor.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyt/helper_visitors/label_visitor.py b/pyt/helper_visitors/label_visitor.py index f594bff8..3be85ba4 100644 --- a/pyt/helper_visitors/label_visitor.py +++ b/pyt/helper_visitors/label_visitor.py @@ -320,3 +320,7 @@ def visit_FormattedValue(self, node): self.result += ':' self.visit_joined_str(node.format_spec) self.result += '}' + + def visit_Starred(self, node): + self.result += '*' + self.visit(node.value) From d2566d2a4fe5fbd5822f6567bf6d3218e5c5125d Mon Sep 17 00:00:00 2001 From: bcaller Date: Thu, 12 Jul 2018 19:31:56 +0100 Subject: [PATCH 201/291] Test Starred in label visitor --- tests/helper_visitors/label_visitor_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/helper_visitors/label_visitor_test.py b/tests/helper_visitors/label_visitor_test.py index e39f5151..2a7f0857 100644 --- a/tests/helper_visitors/label_visitor_test.py +++ b/tests/helper_visitors/label_visitor_test.py @@ -79,3 +79,7 @@ def test_joined_str(self): def test_joined_str_with_format_spec(self): label = self.perform_labeling_on_expression('f"a{b!s:.{length}}"') self.assertEqual(label.result, 'f\'a{b!s:.{length}}\'') + + def test_starred(self): + label = self.perform_labeling_on_expression('[a, *b] = *c, d') + self.assertEqual(label.result, '[a, *b] = (*c, d)') From 5de325ed23df308266748728a8b5c12ae5ef44ec Mon Sep 17 00:00:00 2001 From: bcaller Date: Mon, 23 Jul 2018 17:13:47 +0100 Subject: [PATCH 202/291] Add __init__.py to modules as a package import from test_project.folder import ... should import module in test_project/folder/__init__.py and all the modules within the folder for working out if taint propagates. --- examples/test_project/folder/__init__.py | 0 pyt/core/project_handler.py | 19 +++++++++++-------- tests/core/project_handler_test.py | 8 ++++++-- 3 files changed, 17 insertions(+), 10 deletions(-) create mode 100644 examples/test_project/folder/__init__.py diff --git a/examples/test_project/folder/__init__.py b/examples/test_project/folder/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pyt/core/project_handler.py b/pyt/core/project_handler.py index 48eccfd1..0136010d 100644 --- a/pyt/core/project_handler.py +++ b/pyt/core/project_handler.py @@ -52,14 +52,10 @@ def get_modules(path): '.' ) directory = directory.replace('.', '', 1) - if directory: - modules.append( - ('.'.join((module_root, directory, filename.replace('.py', ''))), os.path.join(root, filename)) - ) - else: - modules.append( - ('.'.join((module_root, filename.replace('.py', ''))), os.path.join(root, filename)) - ) + modules.append(( + '.'.join(p for p in (module_root, directory, _filename_to_module(filename)) if p), + os.path.join(root, filename) + )) return modules @@ -68,3 +64,10 @@ def _is_python_file(path): if os.path.splitext(path)[1] == '.py': return True return False + + +def _filename_to_module(filename): + if filename == '__init__.py': + return '' + else: + return os.path.splitext(filename)[0] diff --git a/tests/core/project_handler_test.py b/tests/core/project_handler_test.py index 4de34b21..ddc812c7 100644 --- a/tests/core/project_handler_test.py +++ b/tests/core/project_handler_test.py @@ -32,6 +32,7 @@ def test_get_modules(self): utils_path = os.path.join(project_folder, 'utils.py') exceptions_path = os.path.join(project_folder, 'exceptions.py') some_path = os.path.join(project_folder, folder, 'some.py') + __init__path = os.path.join(project_folder, folder, '__init__.py') indhold_path = os.path.join(project_folder, folder, directory, 'indhold.py') # relative_folder_name = '.' + folder @@ -39,21 +40,24 @@ def test_get_modules(self): utils_name = project_namespace + '.' + 'utils' exceptions_name = project_namespace + '.' + 'exceptions' some_name = project_namespace + '.' + folder + '.some' + __init__name = project_namespace + '.' + folder indhold_name = project_namespace + '.' + folder + '.' + directory + '.indhold' app_tuple = (app_name, app_path) utils_tuple = (utils_name, utils_path) exceptions_tuple = (exceptions_name, exceptions_path) some_tuple = (some_name, some_path) + __init__tuple = (__init__name, __init__path) indhold_tuple = (indhold_name, indhold_path) self.assertIn(app_tuple, modules) self.assertIn(utils_tuple, modules) self.assertIn(exceptions_tuple, modules) self.assertIn(some_tuple, modules) + self.assertIn(__init__tuple, modules) self.assertIn(indhold_tuple, modules) - self.assertEqual(len(modules), 5) + self.assertEqual(len(modules), 6) def test_get_modules_and_packages(self): project_folder = os.path.normpath(os.path.join('examples', 'test_project')) @@ -104,4 +108,4 @@ def test_get_modules_and_packages(self): self.assertIn(some_tuple, modules) self.assertIn(indhold_tuple, modules) - self.assertEqual(len(modules), 7) + self.assertEqual(len(modules), 8) From e2ad3b03da8132c49e4d3c75d0301a10c839c5dc Mon Sep 17 00:00:00 2001 From: bcaller Date: Mon, 23 Jul 2018 17:09:27 +0100 Subject: [PATCH 203/291] Imports: --dont-prepend-root flag For a project with root in /app, currently pyt expects that all of the imports are of the form: from app.folder.module import thing But actually a project could not be expecting the module root to be prepended. My projects use: from folder.module import thing This adds an optional boolean flag to change the behaviour of get_modules. --- pyt/__main__.py | 2 +- pyt/core/project_handler.py | 27 +++++++++++++++------------ pyt/usage.py | 7 +++++++ tests/core/project_handler_test.py | 28 +++++++++++++++++++++++++++- tests/usage_test.py | 8 ++++++-- 5 files changed, 56 insertions(+), 16 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index a979eb6e..1d76828e 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -86,7 +86,7 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 directory = os.path.normpath(args.project_root) else: directory = os.path.dirname(path) - project_modules = get_modules(directory) + project_modules = get_modules(directory, prepend_module_root=args.prepend_module_root) local_modules = get_directory_modules(directory) tree = generate_ast(path) diff --git a/pyt/core/project_handler.py b/pyt/core/project_handler.py index 0136010d..f6a30f0e 100644 --- a/pyt/core/project_handler.py +++ b/pyt/core/project_handler.py @@ -31,7 +31,7 @@ def get_directory_modules(directory): return _local_modules -def get_modules(path): +def get_modules(path, prepend_module_root=True): """Return a list containing tuples of e.g. ('test_project.utils', 'example/test_project/utils.py') """ @@ -52,10 +52,20 @@ def get_modules(path): '.' ) directory = directory.replace('.', '', 1) - modules.append(( - '.'.join(p for p in (module_root, directory, _filename_to_module(filename)) if p), - os.path.join(root, filename) - )) + + module_name_parts = [] + if prepend_module_root: + module_name_parts.append(module_root) + if directory: + module_name_parts.append(directory) + + if filename == '__init__.py': + path = root + else: + module_name_parts.append(os.path.splitext(filename)[0]) + path = os.path.join(root, filename) + + modules.append(('.'.join(module_name_parts), path)) return modules @@ -64,10 +74,3 @@ def _is_python_file(path): if os.path.splitext(path)[1] == '.py': return True return False - - -def _filename_to_module(filename): - if filename == '__init__.py': - return '' - else: - return os.path.splitext(filename)[0] diff --git a/pyt/usage.py b/pyt/usage.py index 30286215..82d15027 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -101,6 +101,13 @@ def _add_optional_group(parser): default='', help='Separate files with commas' ) + optional_group.add_argument( + '--dont-prepend-root', + help="In project root e.g. /app, imports are not prepended with app.*", + action='http://github.com/store_false', + default=True, + dest='prepend_module_root' + ) def _add_print_group(parser): diff --git a/tests/core/project_handler_test.py b/tests/core/project_handler_test.py index ddc812c7..04e84be2 100644 --- a/tests/core/project_handler_test.py +++ b/tests/core/project_handler_test.py @@ -32,7 +32,7 @@ def test_get_modules(self): utils_path = os.path.join(project_folder, 'utils.py') exceptions_path = os.path.join(project_folder, 'exceptions.py') some_path = os.path.join(project_folder, folder, 'some.py') - __init__path = os.path.join(project_folder, folder, '__init__.py') + __init__path = os.path.join(project_folder, folder) indhold_path = os.path.join(project_folder, folder, directory, 'indhold.py') # relative_folder_name = '.' + folder @@ -59,6 +59,32 @@ def test_get_modules(self): self.assertEqual(len(modules), 6) + def test_get_modules_no_prepend_root(self): + project_folder = os.path.normpath(os.path.join('examples', 'test_project')) + + folder = 'folder' + directory = 'directory' + + modules = get_modules(project_folder, prepend_module_root=False) + + app_path = os.path.join(project_folder, 'app.py') + __init__path = os.path.join(project_folder, folder) + indhold_path = os.path.join(project_folder, folder, directory, 'indhold.py') + + app_name = 'app' + __init__name = folder + indhold_name = folder + '.' + directory + '.indhold' + + app_tuple = (app_name, app_path) + __init__tuple = (__init__name, __init__path) + indhold_tuple = (indhold_name, indhold_path) + + self.assertIn(app_tuple, modules) + self.assertIn(__init__tuple, modules) + self.assertIn(indhold_tuple, modules) + + self.assertEqual(len(modules), 6) + def test_get_modules_and_packages(self): project_folder = os.path.normpath(os.path.join('examples', 'test_project')) diff --git a/tests/usage_test.py b/tests/usage_test.py index d9ed7cec..a5e7db4b 100644 --- a/tests/usage_test.py +++ b/tests/usage_test.py @@ -28,7 +28,8 @@ def test_no_args(self): EXPECTED = """usage: python -m pyt [-h] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] - [-r] [-x EXCLUDED_PATHS] [-trim] [-i] + [-r] [-x EXCLUDED_PATHS] [--dont-prepend-root] [-trim] + [-i] targets [targets ...] required arguments: @@ -55,6 +56,8 @@ def test_no_args(self): -r, --recursive find and process files in subdirectories -x EXCLUDED_PATHS, --exclude EXCLUDED_PATHS Separate files with commas + --dont-prepend-root In project root e.g. /app, imports are not prepended + with app.* print arguments: -trim, --trim-reassigned-in @@ -73,7 +76,8 @@ def test_valid_args_but_no_targets(self): EXPECTED = """usage: python -m pyt [-h] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] - [-r] [-x EXCLUDED_PATHS] [-trim] [-i] + [-r] [-x EXCLUDED_PATHS] [--dont-prepend-root] [-trim] + [-i] targets [targets ...] python -m pyt: error: the following arguments are required: targets\n""" From 11bcd2d93f45c28c89042f3ae82e212585d598f5 Mon Sep 17 00:00:00 2001 From: bcaller Date: Mon, 23 Jul 2018 17:44:30 +0100 Subject: [PATCH 204/291] Remove unused function: valid_date --- .coveragerc | 1 - pyt/usage.py | 9 --------- 2 files changed, 10 deletions(-) diff --git a/.coveragerc b/.coveragerc index c7e7a385..df3137cf 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,7 +2,6 @@ show_missing = True exclude_lines = - def valid_date def __repr__ def __str__ if __name__ == .__main__.: diff --git a/pyt/usage.py b/pyt/usage.py index 82d15027..6a49bb3c 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -18,15 +18,6 @@ ) -def valid_date(s): - date_format = "%Y-%m-%d" - try: - return datetime.strptime(s, date_format).date() - except ValueError: - msg = "Not a valid date: '{0}'. Format: {1}".format(s, date_format) - raise argparse.ArgumentTypeError(msg) - - def _add_required_group(parser): required_group = parser.add_argument_group('required arguments') required_group.add_argument( From 11bb85b90fc1376048145055c78b1f3eb93a8c1b Mon Sep 17 00:00:00 2001 From: bcaller Date: Tue, 24 Jul 2018 14:01:47 +0100 Subject: [PATCH 205/291] Imports: --no-local-imports flag I had a problem where one of my folders had a file called flask.py. Because of this, any imports of the package flask were causing pyt to import from the local file flask.py. In my project this caused a circular import and RecursionError which crashed pyt. Adds a flag so that imports relative to the project root still work: from some.directory.flask import ... but from flask import ... will now only import from project root or treat as an IgnoredNode(). Relative imports: from .flask import ... are not affected and will still work. --- pyt/__main__.py | 3 ++- pyt/cfg/expr_visitor.py | 6 ++++-- pyt/cfg/make_cfg.py | 6 ++++-- pyt/cfg/stmt_visitor.py | 5 ++++- pyt/usage.py | 8 ++++++++ tests/usage_test.py | 11 +++++++---- 6 files changed, 29 insertions(+), 10 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 1d76828e..5eed4747 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -94,7 +94,8 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 tree, project_modules, local_modules, - path + path, + allow_local_directory_imports=args.allow_local_imports ) cfg_list = [cfg] diff --git a/pyt/cfg/expr_visitor.py b/pyt/cfg/expr_visitor.py index b4a96168..f4d99d6a 100644 --- a/pyt/cfg/expr_visitor.py +++ b/pyt/cfg/expr_visitor.py @@ -37,11 +37,13 @@ def __init__( project_modules, local_modules, filename, - module_definitions=None + module_definitions=None, + allow_local_directory_imports=True ): """Create an empty CFG.""" + super().__init__(allow_local_directory_imports=allow_local_directory_imports) self.project_modules = project_modules - self.local_modules = local_modules + self.local_modules = local_modules if self._allow_local_modules else [] self.filenames = [filename] self.blackbox_assignments = set() self.nodes = list() diff --git a/pyt/cfg/make_cfg.py b/pyt/cfg/make_cfg.py index a60b734e..65aee3a8 100644 --- a/pyt/cfg/make_cfg.py +++ b/pyt/cfg/make_cfg.py @@ -30,14 +30,16 @@ def make_cfg( project_modules, local_modules, filename, - module_definitions=None + module_definitions=None, + allow_local_directory_imports=True ): visitor = ExprVisitor( tree, project_modules, local_modules, filename, - module_definitions + module_definitions, + allow_local_directory_imports ) return CFG( visitor.nodes, diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index ba463c32..7bbafb09 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -53,6 +53,9 @@ class StmtVisitor(ast.NodeVisitor): + def __init__(self, allow_local_directory_imports=True): + self._allow_local_modules = allow_local_directory_imports + super().__init__() def visit_Module(self, node): return self.stmt_star_handler(node.body) @@ -753,7 +756,7 @@ def add_module( # noqa: C901 # Analyse the file self.filenames.append(module_path) - self.local_modules = get_directory_modules(module_path) + self.local_modules = get_directory_modules(module_path) if self._allow_local_modules else [] tree = generate_ast(module_path) # module[0] is None during e.g. "from . import foo", so we must str() diff --git a/pyt/usage.py b/pyt/usage.py index 6a49bb3c..a4ec5d81 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -99,6 +99,14 @@ def _add_optional_group(parser): default=True, dest='prepend_module_root' ) + optional_group.add_argument( + '--no-local-imports', + help='If set, absolute imports must be relative to the project root. ' + 'If not set, modules in the same directory can be imported just by their names.', + action='http://github.com/store_false', + default=True, + dest='allow_local_imports' + ) def _add_print_group(parser): diff --git a/tests/usage_test.py b/tests/usage_test.py index a5e7db4b..027c6f00 100644 --- a/tests/usage_test.py +++ b/tests/usage_test.py @@ -28,8 +28,8 @@ def test_no_args(self): EXPECTED = """usage: python -m pyt [-h] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] - [-r] [-x EXCLUDED_PATHS] [--dont-prepend-root] [-trim] - [-i] + [-r] [-x EXCLUDED_PATHS] [--dont-prepend-root] + [--no-local-imports] [-trim] [-i] targets [targets ...] required arguments: @@ -58,6 +58,9 @@ def test_no_args(self): Separate files with commas --dont-prepend-root In project root e.g. /app, imports are not prepended with app.* + --no-local-imports If set, absolute imports must be relative to the + project root. If not set, modules in the same + directory can be imported just by their names. print arguments: -trim, --trim-reassigned-in @@ -76,8 +79,8 @@ def test_valid_args_but_no_targets(self): EXPECTED = """usage: python -m pyt [-h] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] - [-r] [-x EXCLUDED_PATHS] [--dont-prepend-root] [-trim] - [-i] + [-r] [-x EXCLUDED_PATHS] [--dont-prepend-root] + [--no-local-imports] [-trim] [-i] targets [targets ...] python -m pyt: error: the following arguments are required: targets\n""" From 80113af9dd3b1ea10bfd69d2f7433f4eb98c9b54 Mon Sep 17 00:00:00 2001 From: bcaller Date: Tue, 24 Jul 2018 14:08:06 +0100 Subject: [PATCH 206/291] Handle Starred assignments where possible Try to match the targets with the values so we reduce the number of false positives. Before, all right hand side variables were tainting all of the left hand side variables. a, *b = _, _, TAINT a clean, b tainted a, *b, c = _, _, TAINT, TAINT, _ a clean, b tainted, c clean a, *b, c = _, *_, *TAINT, *_ a clean, b tainted, c tainted --- examples/example_inputs/assignment_starred.py | 1 + pyt/cfg/stmt_visitor.py | 49 +++++++++++++++---- pyt/cfg/stmt_visitor_helper.py | 2 + tests/cfg/cfg_test.py | 41 ++++++++++++++++ 4 files changed, 84 insertions(+), 9 deletions(-) create mode 100644 examples/example_inputs/assignment_starred.py diff --git a/examples/example_inputs/assignment_starred.py b/examples/example_inputs/assignment_starred.py new file mode 100644 index 00000000..daeeaf3b --- /dev/null +++ b/examples/example_inputs/assignment_starred.py @@ -0,0 +1 @@ +a, *b, c, d, e = f, *g, *h, f + i, j diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index d99c9ae8..2f2a4cfd 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -327,28 +327,59 @@ def visit_Try(self, node): return ControlFlowNode(try_node, last_statements, break_statements=body.break_statements) def assign_tuple_target(self, node, right_hand_side_variables): - new_assignment_nodes = list() - for i, target in enumerate(node.targets[0].elts): - value = node.value.elts[i] + new_assignment_nodes = [] + remaining_variables = list(right_hand_side_variables) + remaining_targets = list(node.targets[0].elts) + remaining_values = list(node.value.elts) # May contain duplicates + def visit(target, value): label = LabelVisitor() label.visit(target) - + rhs_visitor = RHSVisitor() + rhs_visitor.visit(value) if isinstance(value, ast.Call): new_ast_node = ast.Assign(target, value) - new_ast_node.lineno = node.lineno - + ast.copy_location(new_ast_node, node) new_assignment_nodes.append(self.assignment_call_node(label.result, new_ast_node)) - else: label.result += ' = ' label.visit(value) - new_assignment_nodes.append(self.append_node(AssignmentNode( label.result, extract_left_hand_side(target), ast.Assign(target, value), - right_hand_side_variables, + rhs_visitor.result, + line_number=node.lineno, + path=self.filenames[-1] + ))) + remaining_targets.remove(target) + remaining_values.remove(value) + for var in rhs_visitor.result: + remaining_variables.remove(var) + + # Pair targets and values until a Starred node is reached + for target, value in zip(node.targets[0].elts, node.value.elts): + if isinstance(target, ast.Starred) or isinstance(value, ast.Starred): + break + visit(target, value) + + # If there was a Starred node, pair remaining targets and values from the end + for target, value in zip(reversed(list(remaining_targets)), reversed(list(remaining_values))): + if isinstance(target, ast.Starred) or isinstance(value, ast.Starred): + break + visit(target, value) + + if remaining_targets: + label = LabelVisitor() + label.handle_comma_separated(remaining_targets) + label.result += ' = ' + label.handle_comma_separated(remaining_values) + for target in remaining_targets: + new_assignment_nodes.append(self.append_node(AssignmentNode( + label.result, + extract_left_hand_side(target), + ast.Assign(target, remaining_values[0]), + remaining_variables, line_number=node.lineno, path=self.filenames[-1] ))) diff --git a/pyt/cfg/stmt_visitor_helper.py b/pyt/cfg/stmt_visitor_helper.py index 407df31f..6c49a407 100644 --- a/pyt/cfg/stmt_visitor_helper.py +++ b/pyt/cfg/stmt_visitor_helper.py @@ -79,6 +79,8 @@ def _get_names(node, result): return node.id + result elif isinstance(node, ast.Subscript): return result + elif isinstance(node, ast.Starred): + return _get_names(node.value, result) else: return _get_names(node.value, result + '.' + node.attr) diff --git a/tests/cfg/cfg_test.py b/tests/cfg/cfg_test.py index ece76213..9bea8514 100644 --- a/tests/cfg/cfg_test.py +++ b/tests/cfg/cfg_test.py @@ -1,3 +1,5 @@ +import ast + from .cfg_base_test_case import CFGBaseTestCase from pyt.core.node_types import ( @@ -779,6 +781,45 @@ def test_assignment_tuple_value(self): self.assertEqual(self.cfg.nodes[node].label, 'a = (x, y)') + def test_assignment_starred(self): + self.cfg_create_from_file('examples/example_inputs/assignment_starred.py') + + middle_nodes = self.cfg.nodes[1:-1] + self.assert_length(middle_nodes, expected_length=5) + + visited = [self.cfg.nodes[0]] + while True: + current_node = visited[-1] + if len(current_node.outgoing) != 1: + break + visited.append(current_node.outgoing[0]) + self.assertCountEqual(self.cfg.nodes, visited, msg="Did not complete a path from Entry to Exit") + + self.assertEqual(middle_nodes[0].label, 'a = f') + self.assertCountEqual( # We don't assert a specific order for the assignment nodes + [n.label for n in middle_nodes], + ['a = f', 'd = f + i', 'e = j'] + ['*b, c = *g, *h'] * 2, + ) + self.assertCountEqual( + [(n.left_hand_side, n.right_hand_side_variables) for n in middle_nodes], + [('a', ['f']), ('b', ['g', 'h']), ('c', ['g', 'h']), ('d', ['f', 'i']), ('e', ['j'])], + ) + + def test_assignment_starred_list(self): + self.cfg_create_from_ast(ast.parse('[a, b, c] = *d, e')) + + middle_nodes = self.cfg.nodes[1:-1] + self.assert_length(middle_nodes, expected_length=3) + + self.assertCountEqual( + [n.label for n in middle_nodes], + ['a, b = *d', 'a, b = *d', 'c = e'], + ) + self.assertCountEqual( + [(n.left_hand_side, n.right_hand_side_variables) for n in middle_nodes], + [('a', ['d']), ('b', ['d']), ('c', ['e'])], + ) + class CFGComprehensionTest(CFGBaseTestCase): def test_nodes(self): From 5f1498e80018bcb1a884ad2b6d85690ac16a380c Mon Sep 17 00:00:00 2001 From: bcaller Date: Tue, 24 Jul 2018 15:17:55 +0100 Subject: [PATCH 207/291] Cache of AST tree by module path The dependency graph of a large Python app can be quite a mess. Previously, we were regenerating the AST of files many times, as they were imported by different modules. Adding a simple LRU cache [path -> AST tree] sped up pyt on one of my code bases by 5x. We currently only mutate newly created artificial nodes, so we can store one copy of each module's AST and expect it to be static. We can make it even faster by setting maxsize=None, so there is no eviction logic, but for now I think it's already an improvement. An alternative could be to rewrite the code which deals with imports. --- pyt/core/ast_helper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyt/core/ast_helper.py b/pyt/core/ast_helper.py index 2b8776a5..3d17b7f0 100644 --- a/pyt/core/ast_helper.py +++ b/pyt/core/ast_helper.py @@ -4,6 +4,7 @@ import ast import os import subprocess +from functools import lru_cache BLACK_LISTED_CALL_NAMES = ['self'] @@ -21,6 +22,7 @@ def _convert_to_3(path): # pragma: no cover exit(1) +@lru_cache() def generate_ast(path): """Generate an Abstract Syntax Tree using the ast module. From 97265549180bb9ba677c69032448243efdc34b0e Mon Sep 17 00:00:00 2001 From: bcaller Date: Wed, 25 Jul 2018 10:07:25 +0100 Subject: [PATCH 208/291] Remove unused import In commit 11bcd2d Remove unused function: valid_date an unused import was left in. With the next commit, tox & travis should fail: F401 'datetime.datetime' imported but unused --- pyt/usage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyt/usage.py b/pyt/usage.py index a4ec5d81..7325a7c8 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -1,7 +1,6 @@ import argparse import os import sys -from datetime import datetime default_blackbox_mapping_file = os.path.join( From d55cbc035a72e974de4de4c656b9b3b5ca5ee529 Mon Sep 17 00:00:00 2001 From: bcaller Date: Tue, 24 Jul 2018 15:03:02 +0100 Subject: [PATCH 209/291] Tox, travis and requirements Requirements: Requirements files aren't used. Requirements.txt and setup.py had odd requirements that I can't see used anywhere. Requirements-dev.txt had conflicting packages (flake8 wants specific versions of pyflakes and pycodestyle). Tox & travis: Split tox into a test, coverage and lint phase. Run either: tox tox -e py36 tox -e cover tox -e lint Tox and travis will now fail the lint / build on flake8 errors to avoid non-compliant code being merged. (--exit-zero removed) Coverage will fail for now, so let's set it really low in travis for now. McCabe complexity is annoying and dealt with better by codeclimate so I bumped it up from 10 to 11 so it won't fail at the moment. --- .travis.yml | 9 ++++----- requirements-dev.txt | 8 -------- requirements.txt | 4 ---- setup.py | 6 +----- tox.ini | 20 ++++++++++++++++---- 5 files changed, 21 insertions(+), 26 deletions(-) delete mode 100644 requirements-dev.txt delete mode 100644 requirements.txt diff --git a/.travis.yml b/.travis.yml index 079eef72..0638ada6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,15 +2,14 @@ language: python python: - "3.6" install: - - pip install -r requirements.txt - - pip install codeclimate-test-reporter flake8 + - pip install codeclimate-test-reporter 'coverage>=4.0,<4.4' flake8 before_script: # stop the build if there are Python syntax errors or undefined names - flake8 . --count --exclude=examples --select=E901,E999,F821,F822,F823 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - - flake8 . --count --exclude=examples --exit-zero --max-complexity=10 --max-line-length=127 --statistics script: - - python -m tests - coverage run -m tests + - flake8 . --count --exclude=examples --max-complexity=11 --max-line-length=127 --show-source --statistics + - coverage report --include=tests/* --fail-under 100 + - coverage report --include=pyt/* --fail-under 91 after_script: - codeclimate-test-reporter diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index 54e84f37..00000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,8 +0,0 @@ -flake8 -mock -pre-commit -py -pycodestyle -pyflakes -tox -virtualenv diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index f7b38d47..00000000 --- a/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -coverage>=4.0, <4.4 -GitPython==2.0.8 -graphviz==0.4.10 -requests~=2.12 diff --git a/setup.py b/setup.py index 39aa9440..17c81594 100644 --- a/setup.py +++ b/setup.py @@ -30,11 +30,7 @@ 'Programming Language :: Python :: 3.6' ], keywords=['security', 'vulnerability', 'web', 'flask', 'django', 'static-analysis', 'program-analysis'], - install_requires=[ - 'graphviz>=0.4.10', - 'requests>=2.12', - 'GitPython>=2.0.8' - ], + install_requires=[], entry_points={ 'console_scripts': [ 'pyt = pyt:main' diff --git a/tox.ini b/tox.ini index aa701c72..9ea660f5 100644 --- a/tox.ini +++ b/tox.ini @@ -1,14 +1,26 @@ [tox] -envlist = py36 +envlist = py36,cover,lint [testenv] +deps = mock +commands = + python -m tests + +[testenv:cover] whitelist_externals = coverage -deps = -rrequirements-dev.txt +deps = + coverage>=4.0,<4.4 + mock commands = coverage erase coverage run tests coverage report --include=tests/* --fail-under 100 coverage report --include=pyt/* --fail-under 91 + +[testenv:lint] +deps = + flake8 + pre-commit +commands = pre-commit run - flake8 . --count --exclude=examples,venv,.tox --select=E901,E999,F821,F822,F823 --show-source --statistics - flake8 . --count --exclude=examples,venv,.tox,dist --exit-zero --max-complexity=10 --max-line-length=127 --statistics + flake8 . --count --exclude=examples,.env,venv,.tox --show-source --statistics --max-complexity=11 --max-line-length=127 --statistics From e2841be764ff563b4ace44c88dc1ecd138862372 Mon Sep 17 00:00:00 2001 From: bcaller Date: Fri, 27 Jul 2018 18:03:42 +0100 Subject: [PATCH 210/291] Make assertion in main test work assert_called_with does the assertion. The function has return value None. So don't assert assert_called_with. Fixes the "This with: makes no sense" comment. --- tests/main_test.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/tests/main_test.py b/tests/main_test.py index 4839da51..b87e4dbf 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -26,12 +26,10 @@ def test_text_output(self, mock_text, mock_find_vulnerabilities, mock_parse_args 'parse_args is mocked' ]) assert mock_text.report.call_count == 1 - # This with: makes no sense - with self.assertRaises(AssertionError): - assert mock_text.report.assert_called_with( - mock_find_vulnerabilities.return_value, - mock_parse_args.return_value.output_file - ) + mock_text.report.assert_called_with( + mock_find_vulnerabilities.return_value, + mock_parse_args.return_value.output_file + ) @mock.patch('pyt.__main__.discover_files') @mock.patch('pyt.__main__.parse_args') @@ -54,12 +52,10 @@ def test_json_output(self, mock_json, mock_find_vulnerabilities, mock_parse_args 'parse_args is mocked' ]) assert mock_json.report.call_count == 1 - # This with: makes no sense - with self.assertRaises(AssertionError): - assert mock_json.report.assert_called_with( - mock_find_vulnerabilities.return_value, - mock_parse_args.return_value.output_file - ) + mock_json.report.assert_called_with( + mock_find_vulnerabilities.return_value, + mock_parse_args.return_value.output_file + ) class DiscoverFilesTest(BaseTestCase): From 92fe367cd4921f9af3872a69807a210fb64f0e8c Mon Sep 17 00:00:00 2001 From: bcaller Date: Fri, 27 Jul 2018 18:20:57 +0100 Subject: [PATCH 211/291] Exit with error code if there are unsanitised vulns To be useful as a linter process such as in a Continuous Integration system, pyt should finish with pass or fail exit codes. Saves having to grep the output. If there are unsanitised vulnerabilities, sys.exit(1). In the future we'll probably want a flag to not print sanitised vulns. --- pyt/__main__.py | 5 +++++ tests/main_test.py | 35 +++++++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 5eed4747..192eaf99 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -22,6 +22,7 @@ get_vulnerabilities_not_in_baseline, UImode ) +from .vulnerabilities.vulnerability_helper import SanitisedVulnerability from .web_frameworks import ( FrameworkAdaptor, is_django_view_function, @@ -137,6 +138,10 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 else: text.report(vulnerabilities, args.output_file) + has_unsanitized_vulnerabilities = any(not isinstance(v, SanitisedVulnerability) for v in vulnerabilities) + if has_unsanitized_vulnerabilities: + sys.exit(1) + if __name__ == '__main__': main() diff --git a/tests/main_test.py b/tests/main_test.py index b87e4dbf..b01cc8ee 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -16,15 +16,36 @@ def test_text_output(self, mock_text, mock_find_vulnerabilities, mock_parse_args mock_discover_files.return_value = [example_file] mock_parse_args.return_value = mock.Mock( - autospec=True, project_root=None, baseline=None, json=None, output_file=output_file ) - main([ - 'parse_args is mocked' - ]) + with self.assertRaises(SystemExit): + main(['parse_args is mocked']) + assert mock_text.report.call_count == 1 + mock_text.report.assert_called_with( + mock_find_vulnerabilities.return_value, + mock_parse_args.return_value.output_file + ) + + @mock.patch('pyt.__main__.discover_files') + @mock.patch('pyt.__main__.parse_args') + @mock.patch('pyt.__main__.find_vulnerabilities') + @mock.patch('pyt.__main__.text') + def test_no_vulns_found(self, mock_text, mock_find_vulnerabilities, mock_parse_args, mock_discover_files): + mock_find_vulnerabilities.return_value = [] + example_file = 'examples/vulnerable_code/inter_command_injection.py' + output_file = 'mocked_outfile' + + mock_discover_files.return_value = [example_file] + mock_parse_args.return_value = mock.Mock( + project_root=None, + baseline=None, + json=None, + output_file=output_file + ) + main(['parse_args is mocked']) # No SystemExit assert mock_text.report.call_count == 1 mock_text.report.assert_called_with( mock_find_vulnerabilities.return_value, @@ -42,15 +63,13 @@ def test_json_output(self, mock_json, mock_find_vulnerabilities, mock_parse_args mock_discover_files.return_value = [example_file] mock_parse_args.return_value = mock.Mock( - autospec=True, project_root=None, baseline=None, json=True, output_file=output_file ) - main([ - 'parse_args is mocked' - ]) + with self.assertRaises(SystemExit): + main(['parse_args is mocked']) assert mock_json.report.call_count == 1 mock_json.report.assert_called_with( mock_find_vulnerabilities.return_value, From d4cbde04fc9c30c875244f17e0fe25aba48ffe17 Mon Sep 17 00:00:00 2001 From: bcaller Date: Fri, 27 Jul 2018 18:27:39 +0100 Subject: [PATCH 212/291] Fix pyt console script Can now use just `pyt` exactly like `python -m pyt`. Before there was an error: AttributeError: module 'pyt' has no attribute 'main' --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 17c81594..8d139dc0 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ install_requires=[], entry_points={ 'console_scripts': [ - 'pyt = pyt:main' + 'pyt = pyt.__main__:main' ] } ) From 66f486c5a60c2580c88b1a94c9fa4fec1a0c6c92 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 28 Jul 2018 14:46:44 -0700 Subject: [PATCH 213/291] Add all CHANGELOG additions since version 0.34 --- CHANGELOG.md | 62 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 011e12d5..1f15172c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ If you love PyT, please star our project on GitHub to show your support! :star: #### :tada: New Features #### :sparkles: Usability #### :mortar_board: Walkthrough / Help +#### :performing_arts: Performance #### :telescope: Precision #### :bug: Bugfixes #### :snake: Miscellaneous @@ -26,11 +27,42 @@ If you love PyT, please star our project on GitHub to show your support! :star: #### :tada: New Features +* Ability to analyze directories, `-r` Recursive option ([#129], thanks [@omergunal]) +* Added `--dont-prepend-root` option, makes it so that we don't require imports start with `project_root.*` ([#151], thanks [@bcaller]) +* Added `--no-local-imports` option, to require absolute imports be relative to the project root ([#151], thanks [@bcaller]) +* [PEP 498] support, formatted string literals ([#142], thanks [@bcaller]) +* [PEP 526] support, syntax for variable annotations ([#143], thanks [@bcaller]) * Whitelist lines of sources and sinks ending in `# nosec` ([#121], thanks [@omergunal]) -* Ability to analyze directories, -r Recursive option ([#129], thanks [@omergunal]) +[@bcaller]: https://github.com/bcaller +[PEP 498]: https://www.python.org/dev/peps/pep-0498/ +[PEP 526]: https://www.python.org/dev/peps/pep-0526/ [#121]: https://github.com/python-security/pyt/pull/121 [#129]: https://github.com/python-security/pyt/pull/129 +[#142]: https://github.com/python-security/pyt/pull/142 +[#143]: https://github.com/python-security/pyt/pull/143 +[#151]: https://github.com/python-security/pyt/pull/151 + +#### :telescope: Precision + +* Added per-arg taint, for sink functions ([#147], thanks [@bcaller]) +* Improved tuple assingment to be more precise and support starargs ([#150], thanks [@bcaller]) + +[#147]: https://github.com/python-security/pyt/pull/147 +[#150]: https://github.com/python-security/pyt/pull/150 + +#### :bug: Bugfixes +* Fixed a bug where `get_call_names` only handled ast.Attribute nodes ([#148], thanks [@bcaller]) +* Fixed a bug where `vars_visitor.py` crashed on Python 3.5 dict syntax ([#144], thanks [@bcaller]) + +[#144]: https://github.com/python-security/pyt/pull/144 +[#148]: https://github.com/python-security/pyt/pull/148 + +#### :performing_arts: Performance + +* Added an `lru_cache` to the `generate_ast` function ([#153], thanks [@bcaller]) + +[#153]: https://github.com/python-security/pyt/pull/153 #### :mortar_board: Walkthrough / Help @@ -38,16 +70,22 @@ If you love PyT, please star our project on GitHub to show your support! :star: #### :snake: Miscellaneous +* Added tests for `vars_visitor.py`, making our overall coverage 91% ([#139], thanks [@stannum-l]) +* Cleaned and organized requirements, `setup.py`, `tox.ini` and `.travis.yml` ([#152], thanks [@bcaller]) +* Cleaned up the new pyt/core/ folder ([#132]) * Fixed all flake8 errors ([#114] & [#130], thanks [@cclauss]) * Re-organized the entire codebase into different directories ([#126]) -* Cleaned up the new pyt/core/ folder ([#132]) +* Return exit code 1 if any non-sanitised vulnerabilities are found ([#156], thanks [@bcaller]) -[#126]: https://github.com/python-security/pyt/pull/126 +[@cclauss]: https://github.com/cclauss +[@stannum-l]: https://github.com/stannum-l [#114]: https://github.com/python-security/pyt/pull/114 +[#126]: https://github.com/python-security/pyt/pull/126 [#130]: https://github.com/python-security/pyt/pull/130 -[@cclauss]: https://github.com/cclauss [#132]: https://github.com/python-security/pyt/pull/132 - +[#139]: https://github.com/python-security/pyt/pull/139 +[#152]: https://github.com/python-security/pyt/pull/152 +[#156]: https://github.com/python-security/pyt/pull/156 # 0.34 ##### April 24, 2018 @@ -56,8 +94,8 @@ If you love PyT, please star our project on GitHub to show your support! :star: * Baseline support ([#106], thanks [@omergunal]) -[#106]: https://github.com/python-security/pyt/pull/106 [@omergunal]: https://github.com/omergunal +[#106]: https://github.com/python-security/pyt/pull/106 #### :sparkles: Usability * Combined all source/sink information files and made it the default ([#116]) @@ -68,20 +106,20 @@ If you love PyT, please star our project on GitHub to show your support! :star: #### :bug: Bugfixes * Fixed a bug where `visit_Raise` raised a `TypeError` ([#117], thanks [@lFatty]) -* Fixed an infinite loop bug that was caused while handling certain loops ([#118]) * Fixed a bug where we were not including `pyt/vulnerability_definitions` files ([#122], thanks [@Ekultek]) +* Fixed an infinite loop bug that was caused while handling certain loops ([#118]) #### :snake: Miscellaneous * Moved out a bunch of historical files to the [ReadTheDocs repo](https://github.com/KevinHock/rtdpyt) ([#110], [#111]) -[#116]: https://github.com/python-security/pyt/pull/116 +[@Ekultek]: https://github.com/Ekultek +[@lfatty]: https://github.com/lfatty +[#110]: https://github.com/python-security/pyt/pull/110 +[#111]: https://github.com/python-security/pyt/pull/111 [#115]: https://github.com/python-security/pyt/pull/115 +[#116]: https://github.com/python-security/pyt/pull/116 [#119]: https://github.com/python-security/pyt/pull/119 [#117]: https://github.com/python-security/pyt/pull/117 [#118]: https://github.com/python-security/pyt/pull/118 -[#111]: https://github.com/python-security/pyt/pull/111 -[#110]: https://github.com/python-security/pyt/pull/110 -[@lfatty]: https://github.com/lfatty [#122]: https://github.com/python-security/pyt/issues/122 -[@Ekultek]: https://github.com/Ekultek From dec57bdb13e0a03216ac8effa7406a7caf495b95 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 28 Jul 2018 14:50:18 -0700 Subject: [PATCH 214/291] [version] Bump to 0.35 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8d139dc0..59244cda 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -VERSION = '0.34' +VERSION = '0.35' setup( From 23b51da46c225ce72ff83a3966bc46322419d9df Mon Sep 17 00:00:00 2001 From: bcaller Date: Fri, 27 Jul 2018 11:06:53 +0100 Subject: [PATCH 215/291] Yield propagates taint of everything yielded not just the last thing yielded as before. It behaves like a cross between AugAssign (everything yielded is added to yld_* return variable) and Return (we make a yld_* return variable though don't immediately exit the function). --- .../generator_with_multiple_yields.py | 7 +++ examples/vulnerable_code/yield.py | 21 ++++++++ pyt/cfg/expr_visitor.py | 51 +++++++++++-------- pyt/core/README.rst | 2 +- pyt/core/node_types.py | 8 +++ pyt/vulnerabilities/vulnerability_helper.py | 9 ++++ tests/cfg/cfg_test.py | 36 +++++++++++++ tests/main_test.py | 4 +- tests/vulnerabilities/vulnerabilities_test.py | 25 +++++++++ 9 files changed, 139 insertions(+), 24 deletions(-) create mode 100644 examples/example_inputs/generator_with_multiple_yields.py create mode 100644 examples/vulnerable_code/yield.py diff --git a/examples/example_inputs/generator_with_multiple_yields.py b/examples/example_inputs/generator_with_multiple_yields.py new file mode 100644 index 00000000..46d78682 --- /dev/null +++ b/examples/example_inputs/generator_with_multiple_yields.py @@ -0,0 +1,7 @@ +def foo(): + a = 1 + if a == 1: + yield 0 + yield a + +foo() diff --git a/examples/vulnerable_code/yield.py b/examples/vulnerable_code/yield.py new file mode 100644 index 00000000..9c6a3e1b --- /dev/null +++ b/examples/vulnerable_code/yield.py @@ -0,0 +1,21 @@ +import subprocess +from flask import Flask, request + +app = Flask(__name__) + + +def things_to_run(): + yield "echo hello" + yield from request.get_json()["commands"] + yield "echo done" + + +@app.route('/', methods=['POST']) +def home(): + script = "; ".join(things_to_run()) + subprocess.call(script, shell=True) + return 'Executed' + + +if __name__ == '__main__': + app.run(debug=True) diff --git a/pyt/cfg/expr_visitor.py b/pyt/cfg/expr_visitor.py index f4d99d6a..a3c281f6 100644 --- a/pyt/cfg/expr_visitor.py +++ b/pyt/cfg/expr_visitor.py @@ -15,7 +15,8 @@ IgnoredNode, Node, RestoreNode, - ReturnNode + ReturnNode, + YieldNode ) from .expr_visitor_helper import ( BUILTINS, @@ -119,16 +120,20 @@ def visit_Yield(self, node): except AttributeError: rhs_visitor.result = 'EmptyYield' + # Yield is a bit like augmented assignment to a return value this_function_name = self.function_return_stack[-1] - LHS = 'yield_' + this_function_name - return self.append_node(ReturnNode( - LHS + ' = ' + label.result, + LHS = 'yld_' + this_function_name + return self.append_node(YieldNode( + LHS + ' += ' + label.result, LHS, node, - rhs_visitor.result, + rhs_visitor.result + [LHS], path=self.filenames[-1]) ) + def visit_YieldFrom(self, node): + return self.visit_Yield(node) + def visit_Attribute(self, node): return self.visit_miscelleaneous_node( node @@ -449,24 +454,28 @@ def return_handler( saved_function_call_index(int): Unique number for each call. first_node(EntryOrExitNode or RestoreNode): Used to connect previous statements to this function. """ - for node in function_nodes: + if any(isinstance(node, YieldNode) for node in function_nodes): + # Presence of a `YieldNode` means that the function is a generator + rhs_prefix = 'yld_' + elif any(isinstance(node, ConnectToExitNode) for node in function_nodes): # Only `Return`s and `Raise`s can be of type ConnectToExitNode - if isinstance(node, ConnectToExitNode): - # Create e.g. ~call_1 = ret_func_foo RestoreNode - LHS = CALL_IDENTIFIER + 'call_' + str(saved_function_call_index) - RHS = 'ret_' + get_call_names_as_string(call_node.func) - return_node = RestoreNode( - LHS + ' = ' + RHS, - LHS, - [RHS], - line_number=call_node.lineno, - path=self.filenames[-1] - ) - return_node.first_node = first_node + rhs_prefix = 'ret_' + else: + return # No return value - self.nodes[-1].connect(return_node) - self.nodes.append(return_node) - return + # Create e.g. ~call_1 = ret_func_foo RestoreNode + LHS = CALL_IDENTIFIER + 'call_' + str(saved_function_call_index) + RHS = rhs_prefix + get_call_names_as_string(call_node.func) + return_node = RestoreNode( + LHS + ' = ' + RHS, + LHS, + [RHS], + line_number=call_node.lineno, + path=self.filenames[-1] + ) + return_node.first_node = first_node + self.nodes[-1].connect(return_node) + self.nodes.append(return_node) def process_function(self, call_node, definition): """Processes a user defined function when it is called. diff --git a/pyt/core/README.rst b/pyt/core/README.rst index becfa660..75ae7239 100644 --- a/pyt/core/README.rst +++ b/pyt/core/README.rst @@ -9,7 +9,7 @@ This directory contains miscellaneous code that is imported from different parts - `get_call_names`_ used in `vars_visitor.py`_ when visiting a Subscript, and `framework_helper.py`_ on function decorators in `is_flask_route_function`_ -- `get_call_names_as_string`_ used in `expr_visitor.py`_ to create ret_function_name as RHS and yield_function_name as LHS, and in stmt_visitor.py when connecting a function to a loop. +- `get_call_names_as_string`_ used in `expr_visitor.py`_ to create ret_function_name as RHS and yld_function_name as LHS, and in stmt_visitor.py when connecting a function to a loop. - `Arguments`_ used in `expr_visitor.py`_ when processing the arguments of a user defined function and `framework_adaptor.py`_ to taint function definition arguments. diff --git a/pyt/core/node_types.py b/pyt/core/node_types.py index 5981c5e8..28df3d77 100644 --- a/pyt/core/node_types.py +++ b/pyt/core/node_types.py @@ -285,3 +285,11 @@ def __init__( line_number=ast_node.lineno, path=path ) + + +class YieldNode(AssignmentNode): + """CFG Node that represents a yield or yield from. + + The presence of a YieldNode means that a function is a generator. + """ + pass diff --git a/pyt/vulnerabilities/vulnerability_helper.py b/pyt/vulnerabilities/vulnerability_helper.py index 80a37491..8c3b84a4 100644 --- a/pyt/vulnerabilities/vulnerability_helper.py +++ b/pyt/vulnerabilities/vulnerability_helper.py @@ -4,6 +4,8 @@ from enum import Enum from collections import namedtuple +from ..core.node_types import YieldNode + class VulnerabilityType(Enum): FALSE = 0 @@ -56,6 +58,7 @@ def __init__( self.reassignment_nodes = reassignment_nodes self._remove_sink_from_secondary_nodes() + self._remove_non_propagating_yields() def _remove_sink_from_secondary_nodes(self): try: @@ -63,6 +66,12 @@ def _remove_sink_from_secondary_nodes(self): except ValueError: # pragma: no cover pass + def _remove_non_propagating_yields(self): + """Remove yield with no variables e.g. `yield 123` and plain `yield` from vulnerability.""" + for node in list(self.reassignment_nodes): + if isinstance(node, YieldNode) and len(node.right_hand_side_variables) == 1: + self.reassignment_nodes.remove(node) + def __str__(self): """Pretty printing of a vulnerability.""" reassigned_str = _get_reassignment_str(self.reassignment_nodes) diff --git a/tests/cfg/cfg_test.py b/tests/cfg/cfg_test.py index 9bea8514..64ee5bf2 100644 --- a/tests/cfg/cfg_test.py +++ b/tests/cfg/cfg_test.py @@ -995,6 +995,42 @@ def test_function_multiple_return(self): (call_foo, exit_foo), (_exit, call_foo)]) + def test_generator_multiple_yield(self): + path = 'examples/example_inputs/generator_with_multiple_yields.py' + self.cfg_create_from_file(path) + + self.assert_length(self.cfg.nodes, expected_length=9) + + entry = 0 + entry_foo = 1 + a = 2 + _if = 3 + yld_if = 4 + yld = 5 + exit_foo = 6 + call_foo = 7 + _exit = 8 + + self.assertInCfg([ + (entry_foo, entry), + (a, entry_foo), + (_if, a), + (yld_if, _if), + (yld, _if), + (yld, yld_if), # Different from return + (exit_foo, yld), + (call_foo, exit_foo), + (_exit, call_foo) + ]) + + yld_if_node = self.cfg.nodes[yld_if] + self.assertEqual(yld_if_node.left_hand_side, 'yld_foo') + self.assertEqual(yld_if_node.right_hand_side_variables, ['yld_foo']) + yld_node = self.cfg.nodes[yld] + self.assertEqual(yld_node.left_hand_side, 'yld_foo') + self.assertEqual(yld_node.right_hand_side_variables, ['a', 'yld_foo']) + self.assertEqual(self.cfg.nodes[call_foo].right_hand_side_variables, ['yld_foo']) + def test_blackbox_call_after_if(self): path = 'examples/vulnerable_code/blackbox_call_after_if.py' self.cfg_create_from_file(path) diff --git a/tests/main_test.py b/tests/main_test.py index b01cc8ee..b3b94b6d 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -99,11 +99,11 @@ def test_targets_with_recursive(self): excluded_files = "" included_files = discover_files(targets, excluded_files, True) - self.assertEqual(len(included_files), 30) + self.assertEqual(len(included_files), 31) def test_targets_with_recursive_and_excluded(self): targets = ["examples/vulnerable_code/"] excluded_files = "inter_command_injection.py" included_files = discover_files(targets, excluded_files, True) - self.assertEqual(len(included_files), 29) + self.assertEqual(len(included_files), 30) diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 20d142ba..04267140 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -492,6 +492,31 @@ def test_XSS_variable_multiple_assign_result(self): self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) + def test_yield(self): + vulnerabilities = self.run_analysis('examples/vulnerable_code/yield.py') + self.assert_length(vulnerabilities, expected_length=1) + vuln = vulnerabilities[0] + self.assertEqual(vuln.source.left_hand_side, "yld_things_to_run") + self.assertIn("yld_things_to_run", vuln.source.right_hand_side_variables) + EXPECTED_VULNERABILITY_DESCRIPTION = """ + File: examples/vulnerable_code/yield.py + > User input at line 9, source "request.get_json(": + yld_things_to_run += request.get_json()['commands'] + Reassigned in: + File: examples/vulnerable_code/yield.py + > Line 15: ~call_2 = yld_things_to_run + File: examples/vulnerable_code/yield.py + > Line 15: ~call_1 = ret_'; '.join(~call_2) + File: examples/vulnerable_code/yield.py + > Line 15: script = ~call_1 + File: examples/vulnerable_code/yield.py + > reaches line 16, sink "subprocess.call(": + ~call_3 = ret_subprocess.call(script, shell=True) + This vulnerability is unknown due to: Label: ~call_1 = ret_'; '.join(~call_2) + """ + + self.assertAlphaEqual(str(vuln), EXPECTED_VULNERABILITY_DESCRIPTION) + class EngineDjangoTest(VulnerabilitiesBaseTestCase): def run_analysis(self, path): From 25eb536b32417378f31206cc75915bae0b11cc05 Mon Sep 17 00:00:00 2001 From: bcaller Date: Fri, 27 Jul 2018 11:39:20 +0100 Subject: [PATCH 216/291] Remove unnecessary and incorrect type EmptyYield It was a str when it should've been a List[str] --- pyt/cfg/expr_visitor.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pyt/cfg/expr_visitor.py b/pyt/cfg/expr_visitor.py index a3c281f6..6623d717 100644 --- a/pyt/cfg/expr_visitor.py +++ b/pyt/cfg/expr_visitor.py @@ -114,11 +114,10 @@ def visit_Yield(self, node): label = LabelVisitor() label.visit(node) - try: - rhs_visitor = RHSVisitor() - rhs_visitor.visit(node.value) - except AttributeError: - rhs_visitor.result = 'EmptyYield' + if node.value is None: + rhs_visitor_result = [] + else: + rhs_visitor_result = RHSVisitor.result_for_node(node.value) # Yield is a bit like augmented assignment to a return value this_function_name = self.function_return_stack[-1] @@ -127,7 +126,7 @@ def visit_Yield(self, node): LHS + ' += ' + label.result, LHS, node, - rhs_visitor.result + [LHS], + rhs_visitor_result + [LHS], path=self.filenames[-1]) ) From ff0e04273a263b9a497429fec6d3b72532cdf4f3 Mon Sep 17 00:00:00 2001 From: bcaller Date: Fri, 27 Jul 2018 12:13:10 +0100 Subject: [PATCH 217/291] Remove unnecessary and incorrect type EmptyReturn It was a str instead of List[str] --- pyt/cfg/stmt_visitor.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index faccb94b..e64ee163 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -244,12 +244,6 @@ def visit_Return(self, node): label = LabelVisitor() label.visit(node) - try: - rhs_visitor = RHSVisitor() - rhs_visitor.visit(node.value) - except AttributeError: - rhs_visitor.result = 'EmptyReturn' - this_function_name = self.function_return_stack[-1] LHS = 'ret_' + this_function_name @@ -263,14 +257,17 @@ def visit_Return(self, node): path=self.filenames[-1] ) return_value_of_call.connect(return_node) - self.nodes.append(return_node) - return return_node + return self.append_node(return_node) + elif node.value is not None: + rhs_visitor_result = RHSVisitor.result_for_node(node.value) + else: + rhs_visitor_result = [] return self.append_node(ReturnNode( LHS + ' = ' + label.result, LHS, node, - rhs_visitor.result, + rhs_visitor_result, path=self.filenames[-1] )) From 04b29c65cfdd2592463b4ea641c6b584d7d1ee65 Mon Sep 17 00:00:00 2001 From: bcaller Date: Fri, 27 Jul 2018 12:54:37 +0100 Subject: [PATCH 218/291] AugAssign propagates taint Before, the variable would be tainted only if the last += was tainted. Now url = 'http://' url += TAINT url += '?x=y' url marked as tainted. --- pyt/cfg/stmt_visitor.py | 5 +++-- tests/cfg/cfg_test.py | 8 ++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index e64ee163..4acfea20 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -499,11 +499,12 @@ def visit_AugAssign(self, node): rhs_visitor = RHSVisitor() rhs_visitor.visit(node.value) + lhs = extract_left_hand_side(node.target) return self.append_node(AssignmentNode( label.result, - extract_left_hand_side(node.target), + lhs, node, - rhs_visitor.result, + rhs_visitor.result + [lhs], path=self.filenames[-1] )) diff --git a/tests/cfg/cfg_test.py b/tests/cfg/cfg_test.py index 64ee5bf2..1d317fc5 100644 --- a/tests/cfg/cfg_test.py +++ b/tests/cfg/cfg_test.py @@ -820,6 +820,14 @@ def test_assignment_starred_list(self): [('a', ['d']), ('b', ['d']), ('c', ['e'])], ) + def test_augmented_assignment(self): + self.cfg_create_from_ast(ast.parse('a+=f(b,c)')) + + (node,) = self.cfg.nodes[1:-1] + self.assertEqual(node.label, 'a += f(b, c)') + self.assertEqual(node.left_hand_side, 'a') + self.assertEqual(node.right_hand_side_variables, ['b', 'c', 'a']) + class CFGComprehensionTest(CFGBaseTestCase): def test_nodes(self): From 97f99a061f2d65405dadc04f9ab4942d9d786d44 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 30 Jul 2018 18:25:48 -0700 Subject: [PATCH 219/291] [version] Bump to 0.36 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 59244cda..a03dd6e5 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -VERSION = '0.35' +VERSION = '0.36' setup( From 76be3218525b7b24b567a709a5ebd492c0ab8585 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 30 Jul 2018 19:27:02 -0700 Subject: [PATCH 220/291] Add __init_ files to pyt/analysis and pyt/core --- pyt/analysis/__init__.py | 0 pyt/core/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 pyt/analysis/__init__.py create mode 100644 pyt/core/__init__.py diff --git a/pyt/analysis/__init__.py b/pyt/analysis/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pyt/core/__init__.py b/pyt/core/__init__.py new file mode 100644 index 00000000..e69de29b From bc3200ccb45b5af4f79005c75677eb8ae23bf532 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 30 Jul 2018 19:27:41 -0700 Subject: [PATCH 221/291] [version] Bump to 0.37 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a03dd6e5..edf41be0 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -VERSION = '0.36' +VERSION = '0.37' setup( From 5b7d06b0ec5840b69f41d3678930fc6a2675d311 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Mon, 30 Jul 2018 19:47:40 -0700 Subject: [PATCH 222/291] Changed Unreleased to 0.37 And added #155 notes --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f15172c..6b487727 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,8 @@ If you love PyT, please star our project on GitHub to show your support! :star: [@xxxx]: https://github.com/xxxx --> -# Unreleased +# 0.37 +##### July 30, 2018 #### :tada: New Features @@ -47,9 +48,11 @@ If you love PyT, please star our project on GitHub to show your support! :star: * Added per-arg taint, for sink functions ([#147], thanks [@bcaller]) * Improved tuple assingment to be more precise and support starargs ([#150], thanks [@bcaller]) +* AugAssign, Yield, and YieldFrom taint propagation improvements ([#155], thanks [@bcaller]) [#147]: https://github.com/python-security/pyt/pull/147 [#150]: https://github.com/python-security/pyt/pull/150 +[#155]: https://github.com/python-security/pyt/pull/155 #### :bug: Bugfixes * Fixed a bug where `get_call_names` only handled ast.Attribute nodes ([#148], thanks [@bcaller]) From de30591cf9ffcae71de3725fdd1b9ee529b53c64 Mon Sep 17 00:00:00 2001 From: bcaller Date: Tue, 31 Jul 2018 10:20:28 +0100 Subject: [PATCH 223/291] Transform all async ast nodes into sync nodes so that they can be handled identically. Sync and async nodes propagate taint in exactly the same way. Awaits are more or less removed: `return await x()` is converted into `return x()`. Any AsyncFunctionDef is converted to a FunctionDef. Same for AsyncFor and AsyncWith. Using a transformer makes it easier to replace awaits everywhere. We have lots of places where we do `isinstance(node, ast.Call)` but for these we want it to function like `isinstance(node, ast.Call) or (isinstance(node, ast.Await) and isinstance(node.value, ast.Call))` --- examples/example_inputs/asynchronous.py | 10 +++++++ pyt/core/ast_helper.py | 5 +++- pyt/core/transformer.py | 18 +++++++++++++ pyt/helper_visitors/vars_visitor.py | 3 --- tests/cfg/cfg_test.py | 22 +++++++++++++++ tests/core/transformer_test.py | 36 +++++++++++++++++++++++++ 6 files changed, 90 insertions(+), 4 deletions(-) create mode 100644 examples/example_inputs/asynchronous.py create mode 100644 pyt/core/transformer.py create mode 100644 tests/core/transformer_test.py diff --git a/examples/example_inputs/asynchronous.py b/examples/example_inputs/asynchronous.py new file mode 100644 index 00000000..dff4fdc9 --- /dev/null +++ b/examples/example_inputs/asynchronous.py @@ -0,0 +1,10 @@ +async def g(x, *args): + return await x() + + +async def f(y): + z = await g(y, await v) + return z + + +f(w) diff --git a/pyt/core/ast_helper.py b/pyt/core/ast_helper.py index 3d17b7f0..9a16267c 100644 --- a/pyt/core/ast_helper.py +++ b/pyt/core/ast_helper.py @@ -6,6 +6,8 @@ import subprocess from functools import lru_cache +from .transformer import AsyncTransformer + BLACK_LISTED_CALL_NAMES = ['self'] recursive = False @@ -32,7 +34,8 @@ def generate_ast(path): if os.path.isfile(path): with open(path, 'r') as f: try: - return ast.parse(f.read()) + tree = ast.parse(f.read()) + return AsyncTransformer().visit(tree) except SyntaxError: # pragma: no cover global recursive if not recursive: diff --git a/pyt/core/transformer.py b/pyt/core/transformer.py new file mode 100644 index 00000000..12051c89 --- /dev/null +++ b/pyt/core/transformer.py @@ -0,0 +1,18 @@ +import ast + + +class AsyncTransformer(ast.NodeTransformer): + """Converts all async nodes into their synchronous counterparts.""" + + def visit_Await(self, node): + """Awaits are treated as if the keyword was absent.""" + return self.visit(node.value) + + def visit_AsyncFunctionDef(self, node): + return self.visit(ast.FunctionDef(**node.__dict__)) + + def visit_AsyncFor(self, node): + return self.visit(ast.For(**node.__dict__)) + + def visit_AsyncWith(self, node): + return self.visit(ast.With(**node.__dict__)) diff --git a/pyt/helper_visitors/vars_visitor.py b/pyt/helper_visitors/vars_visitor.py index 272744d8..9708f86f 100644 --- a/pyt/helper_visitors/vars_visitor.py +++ b/pyt/helper_visitors/vars_visitor.py @@ -68,9 +68,6 @@ def visit_GeneratorComp(self, node): for gen in node.generators: self.comprehension(gen) - def visit_Await(self, node): - self.visit(node.value) - def visit_Yield(self, node): if node.value: self.visit(node.value) diff --git a/tests/cfg/cfg_test.py b/tests/cfg/cfg_test.py index 1d317fc5..cc2df076 100644 --- a/tests/cfg/cfg_test.py +++ b/tests/cfg/cfg_test.py @@ -1446,6 +1446,28 @@ def test_name_constant_if(self): self.assertEqual(expected_label, actual_label) +class CFGAsync(CFGBaseTestCase): + def test_await_keyword_treated_as_if_absent(self): + self.cfg_create_from_file('examples/example_inputs/asynchronous.py') + enter_g = 8 + call_x = 9 + ret_g = 10 + exit_g = 11 + call_ret_val = 12 + set_z_to_g_ret_val = 13 + + for i in range(enter_g, set_z_to_g_ret_val + 1): + self.assertIn(self.cfg.nodes[i], self.cfg.nodes[i + 1].ingoing) + self.assertIn(self.cfg.nodes[i + 1], self.cfg.nodes[i].outgoing) + + self.assertIsInstance(self.cfg.nodes[enter_g], EntryOrExitNode) + self.assertEqual(self.cfg.nodes[call_x].label, '~call_3 = ret_x()') + self.assertEqual(self.cfg.nodes[ret_g].label, 'ret_g = ~call_3') + self.assertIsInstance(self.cfg.nodes[exit_g], EntryOrExitNode) + self.assertEqual(self.cfg.nodes[call_ret_val].label, '~call_2 = ret_g') + self.assertEqual(self.cfg.nodes[set_z_to_g_ret_val].label, 'z = ~call_2') + + class CFGName(CFGBaseTestCase): """Test is Name nodes are properly handled in different contexts""" diff --git a/tests/core/transformer_test.py b/tests/core/transformer_test.py new file mode 100644 index 00000000..8233287b --- /dev/null +++ b/tests/core/transformer_test.py @@ -0,0 +1,36 @@ +import ast +import unittest + +from pyt.core.transformer import AsyncTransformer + + +class TransformerTest(unittest.TestCase): + """Tests for the AsyncTransformer.""" + + def test_async_removed_by_transformer(self): + async_tree = ast.parse("\n".join([ + "async def a():", + " async for b in c():", + " await b()", + " async with d() as e:", + " pass", + " return await y()" + ])) + self.assertIsInstance(async_tree.body[0], ast.AsyncFunctionDef) + self.assertIsInstance(async_tree.body[0].body[-1], ast.Return) + self.assertIsInstance(async_tree.body[0].body[-1].value, ast.Await) + + sync_tree = ast.parse("\n".join([ + "def a():", + " for b in c():", + " b()", + " with d() as e:", + " pass", + " return y()" + ])) + self.assertIsInstance(sync_tree.body[0], ast.FunctionDef) + + transformed = AsyncTransformer().visit(async_tree) + self.assertIsInstance(transformed.body[0], ast.FunctionDef) + + self.assertEqual(ast.dump(transformed), ast.dump(sync_tree)) From d626efc6c45dc9cfdee4737dd4957f1047f1cf6a Mon Sep 17 00:00:00 2001 From: KevinHock Date: Tue, 31 Jul 2018 12:59:33 -0700 Subject: [PATCH 224/291] Remove references to dependencies we no longer have --- README.rst | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/README.rst b/README.rst index 0870edc0..f536217e 100644 --- a/README.rst +++ b/README.rst @@ -149,18 +149,4 @@ Change to project directory ``cd pyt`` -Install dependencies - -``pip install -r requirements.txt`` - -``pip list`` sample output :: - - gitdb (0.6.4) - GitPython (2.0.8) - graphviz (0.4.10) - pip (9.0.1) - requests (2.10.0) - setuptools (28.8.0) - smmap (0.9.0) - In the future, just type ``source ~/a_folder/bin/activate`` to start developing. From b1556a1de8c0129126cd631b8f7b86fc99588a4b Mon Sep 17 00:00:00 2001 From: KevinHock Date: Tue, 31 Jul 2018 16:56:11 -0700 Subject: [PATCH 225/291] Added more backticks --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b487727..15a60fae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,7 +55,7 @@ If you love PyT, please star our project on GitHub to show your support! :star: [#155]: https://github.com/python-security/pyt/pull/155 #### :bug: Bugfixes -* Fixed a bug where `get_call_names` only handled ast.Attribute nodes ([#148], thanks [@bcaller]) +* Fixed a bug where `get_call_names` only handled `ast.Attribute` nodes ([#148], thanks [@bcaller]) * Fixed a bug where `vars_visitor.py` crashed on Python 3.5 dict syntax ([#144], thanks [@bcaller]) [#144]: https://github.com/python-security/pyt/pull/144 @@ -69,13 +69,13 @@ If you love PyT, please star our project on GitHub to show your support! :star: #### :mortar_board: Walkthrough / Help -* Added README.rst files to almost every directory. (Partially [#126]) +* Added `README.rst` files to almost every directory. (Partially [#126]) #### :snake: Miscellaneous * Added tests for `vars_visitor.py`, making our overall coverage 91% ([#139], thanks [@stannum-l]) * Cleaned and organized requirements, `setup.py`, `tox.ini` and `.travis.yml` ([#152], thanks [@bcaller]) -* Cleaned up the new pyt/core/ folder ([#132]) +* Cleaned up the new `pyt/core/` folder ([#132]) * Fixed all flake8 errors ([#114] & [#130], thanks [@cclauss]) * Re-organized the entire codebase into different directories ([#126]) * Return exit code 1 if any non-sanitised vulnerabilities are found ([#156], thanks [@bcaller]) From a3081c098f34b0748f742bb60af083da335da15d Mon Sep 17 00:00:00 2001 From: "Daniel M. Capella" Date: Wed, 1 Aug 2018 14:02:10 -0400 Subject: [PATCH 226/291] mock is included in unittest since Python 3.3 https://docs.python.org/3/library/unittest.mock.html "Pyt runs on python 3.6+": https://github.com/python-security/pyt/commit/609acd6ca7e3800f21e82d5e3736e8112a91b49c#r29924039 --- tests/main_test.py | 2 +- tox.ini | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/main_test.py b/tests/main_test.py index b3b94b6d..1037d843 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -1,4 +1,4 @@ -import mock +from unittest import mock from .base_test_case import BaseTestCase from pyt.__main__ import discover_files, main diff --git a/tox.ini b/tox.ini index 9ea660f5..c9b4b248 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,6 @@ envlist = py36,cover,lint [testenv] -deps = mock commands = python -m tests @@ -10,7 +9,6 @@ commands = whitelist_externals = coverage deps = coverage>=4.0,<4.4 - mock commands = coverage erase coverage run tests From 0038b1e2ea92b7e38bd7f94e52380d7514830aa8 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 2 Aug 2018 18:42:04 -0700 Subject: [PATCH 227/291] Only write to blackbox mapping if in interactive mode --- pyt/vulnerabilities/vulnerabilities.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pyt/vulnerabilities/vulnerabilities.py b/pyt/vulnerabilities/vulnerabilities.py index 7fd14cd7..f169c4b2 100644 --- a/pyt/vulnerabilities/vulnerabilities.py +++ b/pyt/vulnerabilities/vulnerabilities.py @@ -540,6 +540,9 @@ def find_vulnerabilities( vulnerabilities, nosec_lines ) - with open(blackbox_mapping_file, 'w') as outfile: - json.dump(blackbox_mapping, outfile, indent=4) + + if ui_mode == UImode.INTERACTIVE: + with open(blackbox_mapping_file, 'w') as outfile: + json.dump(blackbox_mapping, outfile, indent=4) + return vulnerabilities From a58c7482c6f6818c79e5a911a03cc774bce54daa Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 2 Aug 2018 18:43:27 -0700 Subject: [PATCH 228/291] [version] Bump to 0.38 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index edf41be0..c6b082f4 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -VERSION = '0.37' +VERSION = '0.38' setup( From 3d722eb54e5283f3ad1ee7693c83d3ad22932de5 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Thu, 2 Aug 2018 19:01:57 -0700 Subject: [PATCH 229/291] Added 0.38 Notes --- CHANGELOG.md | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15a60fae..19222a31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,8 +23,30 @@ If you love PyT, please star our project on GitHub to show your support! :star: [@xxxx]: https://github.com/xxxx --> +# 0.38 +##### August 2nd, 2018 + +#### :tada: New Features + +* Support for all async node types ([#158], thanks [@bcaller]) + +[#158]: https://github.com/python-security/pyt/pull/158 + +#### :bug: Bugfixes +* Fixed a bug where we [wrote to the default_blackbox_mapping file](https://github.com/python-security/pyt/commit/0038b1e2ea92b7e38bd7f94e52380d7514830aa8) even if the `-i` option was not being used. ([#161], thanks [@polyzen]) + +[@polyzen]: https://github.com/polyzen +[#161]: https://github.com/python-security/pyt/issues/161 + +#### :snake: Miscellaneous + +* Switched to using built-in mock ([#160], thanks [@polyzen]) + +[#160]: https://github.com/python-security/pyt/pull/160 + + # 0.37 -##### July 30, 2018 +##### July 30th, 2018 #### :tada: New Features @@ -91,7 +113,7 @@ If you love PyT, please star our project on GitHub to show your support! :star: [#156]: https://github.com/python-security/pyt/pull/156 # 0.34 -##### April 24, 2018 +##### April 24th, 2018 #### :tada: New Features From 3126aae3f2d790918374356db568a4d25764fc93 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Thu, 2 Aug 2018 19:19:38 -0700 Subject: [PATCH 230/291] Update README.rst --- pyt/web_frameworks/README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyt/web_frameworks/README.rst b/pyt/web_frameworks/README.rst index f7292756..e410e5ff 100644 --- a/pyt/web_frameworks/README.rst +++ b/pyt/web_frameworks/README.rst @@ -1,3 +1,6 @@ +Web Frameworks +============== + This code determines which functions have their arguments marked at tainted, for example by default the framework adaptor is Flask, so .. code-block:: python From b6129cf5e7676ded5802f4a93a15614675e24c21 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Thu, 2 Aug 2018 19:33:54 -0700 Subject: [PATCH 231/291] Separated the important options from the rest This readme is shitty, I need to improve it --- README.rst | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index f536217e..5c6f767f 100644 --- a/README.rst +++ b/README.rst @@ -72,10 +72,18 @@ Usage required arguments: targets source file(s) or directory(s) to be tested - optional arguments: + important optional arguments: -a ADAPTOR, --adaptor ADAPTOR Choose a web framework adaptor: Flask(Default), Django, Every or Pylons + + -t TRIGGER_WORD_FILE, --trigger-word-file TRIGGER_WORD_FILE + Input file with a list of sources and sinks + + -m BLACKBOX_MAPPING_FILE, --blackbox-mapping-file BLACKBOX_MAPPING_FILE + Input blackbox mapping file + + optional arguments: -pr PROJECT_ROOT, --project-root PROJECT_ROOT Add project root, only important when the entry file is not at the root of the project. @@ -83,10 +91,6 @@ Usage Path of a baseline report to compare against (only JSON-formatted files are accepted) -j, --json Prints JSON instead of report. - -m BLACKBOX_MAPPING_FILE, --blackbox-mapping-file BLACKBOX_MAPPING_FILE - Input blackbox mapping file. - -t TRIGGER_WORD_FILE, --trigger-word-file TRIGGER_WORD_FILE - Input file with a list of sources and sinks -o OUTPUT_FILE, --output OUTPUT_FILE write report to filename --ignore-nosec do not skip lines with # nosec comments @@ -94,6 +98,7 @@ Usage -x EXCLUDED_PATHS, --exclude EXCLUDED_PATHS Separate files with commas + print arguments: -trim, --trim-reassigned-in Trims the reassigned list to just the vulnerability @@ -101,6 +106,25 @@ Usage -i, --interactive Will ask you about each blackbox function call in vulnerability chains. +Choosing a Web Framework +======================== + +`The -a option chooses what functions will have their arguments tainted`_ + +.. _The -a option chooses what functions will have their arguments tainted: https://github.com/python-security/pyt/tree/master/pyt/web_frameworks#web-frameworks + +Configuring Source and Sink Information +======================================= + +Use the ``-t`` option to specify sources and sinks, by default `this file is used`_. + +.. _this file is used: https://github.com/python-security/pyt/blob/master/pyt/vulnerability_definitions/all_trigger_words.pyt + +For functions that are imported from libraries, use the ``-m`` option to specify whether or not they +return tainted values given tainted inputs, by `default this file is used`_. + +.. _default this file is used: https://github.com/python-security/pyt/blob/master/pyt/vulnerability_definitions/blackbox_mapping.json) + Usage from Source ================= From 8e5e0a9f4b8565979dabcb44ee37d516cb70b0a9 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Thu, 2 Aug 2018 19:43:29 -0700 Subject: [PATCH 232/291] Added `How To Use` section --- README.rst | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/README.rst b/README.rst index 5c6f767f..98534834 100644 --- a/README.rst +++ b/README.rst @@ -54,10 +54,26 @@ PyT can also be installed from source. To do so, clone the repo, and then run: How It Works ============ -Soon you will find a README.rst in every directory in the pyt folder, `start here`_. +Soon you will find a `README.rst`_ in every directory in the ``pyt/`` folder, `start here`_. +.. _README.rst: https://github.com/python-security/pyt/tree/master/pyt .. _start here: https://github.com/python-security/pyt/tree/master/pyt + +How To Use +============ + +1. Choose a web framework: `The -a option chooses what functions will have their arguments tainted`_, by default it is Flask. + +2. (optional) Customize source and sink information: Use the ``-t`` option to specify sources and sinks, by default `this file is used`_. + +3. (optional) Customize which library functions propagate taint: For functions that are imported from libraries, e.g. ``url_for`` or ``os.path.join``, use the ``-m`` option to specify whether or not they return tainted values given tainted inputs, by `default this file is used`_. + +.. _The -a option chooses what functions will have their arguments tainted: https://github.com/python-security/pyt/tree/master/pyt/web_frameworks#web-frameworks +.. _this file is used: https://github.com/python-security/pyt/blob/master/pyt/vulnerability_definitions/all_trigger_words.pyt +.. _default this file is used: https://github.com/python-security/pyt/blob/master/pyt/vulnerability_definitions/blackbox_mapping.json + + Usage ===== @@ -106,25 +122,6 @@ Usage -i, --interactive Will ask you about each blackbox function call in vulnerability chains. -Choosing a Web Framework -======================== - -`The -a option chooses what functions will have their arguments tainted`_ - -.. _The -a option chooses what functions will have their arguments tainted: https://github.com/python-security/pyt/tree/master/pyt/web_frameworks#web-frameworks - -Configuring Source and Sink Information -======================================= - -Use the ``-t`` option to specify sources and sinks, by default `this file is used`_. - -.. _this file is used: https://github.com/python-security/pyt/blob/master/pyt/vulnerability_definitions/all_trigger_words.pyt - -For functions that are imported from libraries, use the ``-m`` option to specify whether or not they -return tainted values given tainted inputs, by `default this file is used`_. - -.. _default this file is used: https://github.com/python-security/pyt/blob/master/pyt/vulnerability_definitions/blackbox_mapping.json) - Usage from Source ================= From 572257179e86931ecb8a4ce5d77474391ae5c449 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Thu, 2 Aug 2018 19:43:49 -0700 Subject: [PATCH 233/291] Update README.rst --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 98534834..2e17f70d 100644 --- a/README.rst +++ b/README.rst @@ -51,7 +51,7 @@ PyT can also be installed from source. To do so, clone the repo, and then run: python3 setup.py install -How It Works +How it Works ============ Soon you will find a `README.rst`_ in every directory in the ``pyt/`` folder, `start here`_. @@ -60,7 +60,7 @@ Soon you will find a `README.rst`_ in every directory in the ``pyt/`` folder, `s .. _start here: https://github.com/python-security/pyt/tree/master/pyt -How To Use +How to Use ============ 1. Choose a web framework: `The -a option chooses what functions will have their arguments tainted`_, by default it is Flask. From 4332e005399bbf606420d4a199f458cf8ad1e3b8 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Thu, 2 Aug 2018 19:44:31 -0700 Subject: [PATCH 234/291] Update README.rst --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 2e17f70d..74a0bad9 100644 --- a/README.rst +++ b/README.rst @@ -63,13 +63,13 @@ Soon you will find a `README.rst`_ in every directory in the ``pyt/`` folder, `s How to Use ============ -1. Choose a web framework: `The -a option chooses what functions will have their arguments tainted`_, by default it is Flask. +1. Choose a web framework: `The -a option determines which functions will have their arguments tainted`_, by default it is Flask. 2. (optional) Customize source and sink information: Use the ``-t`` option to specify sources and sinks, by default `this file is used`_. 3. (optional) Customize which library functions propagate taint: For functions that are imported from libraries, e.g. ``url_for`` or ``os.path.join``, use the ``-m`` option to specify whether or not they return tainted values given tainted inputs, by `default this file is used`_. -.. _The -a option chooses what functions will have their arguments tainted: https://github.com/python-security/pyt/tree/master/pyt/web_frameworks#web-frameworks +.. _The -a option determines which functions will have their arguments tainted: https://github.com/python-security/pyt/tree/master/pyt/web_frameworks#web-frameworks .. _this file is used: https://github.com/python-security/pyt/blob/master/pyt/vulnerability_definitions/all_trigger_words.pyt .. _default this file is used: https://github.com/python-security/pyt/blob/master/pyt/vulnerability_definitions/blackbox_mapping.json From 7d35be38c856e1b7b417afbd494eacbd9f0d8752 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Thu, 2 Aug 2018 19:45:09 -0700 Subject: [PATCH 235/291] Update README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 74a0bad9..c4c60b63 100644 --- a/README.rst +++ b/README.rst @@ -119,7 +119,7 @@ Usage -trim, --trim-reassigned-in Trims the reassigned list to just the vulnerability chain. - -i, --interactive Will ask you about each blackbox function call in + -i, --interactive Will ask you about each blackbox function call in vulnerability chains. Usage from Source From e4e0805efc15611077be8f8f0aad6e3c569fa730 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Thu, 2 Aug 2018 19:46:18 -0700 Subject: [PATCH 236/291] Update README.rst --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index c4c60b63..7ed85f0b 100644 --- a/README.rst +++ b/README.rst @@ -108,9 +108,9 @@ Usage JSON-formatted files are accepted) -j, --json Prints JSON instead of report. -o OUTPUT_FILE, --output OUTPUT_FILE - write report to filename - --ignore-nosec do not skip lines with # nosec comments - -r, --recursive find and process files in subdirectories + Write report to filename + --ignore-nosec Do not skip lines with # nosec comments + -r, --recursive Find and process files in subdirectories -x EXCLUDED_PATHS, --exclude EXCLUDED_PATHS Separate files with commas From 92fa3b71ec39d0682be3d6f0e54c729c8906cee6 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Thu, 2 Aug 2018 19:47:41 -0700 Subject: [PATCH 237/291] Update README.rst --- README.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 7ed85f0b..4f413d00 100644 --- a/README.rst +++ b/README.rst @@ -102,15 +102,21 @@ Usage optional arguments: -pr PROJECT_ROOT, --project-root PROJECT_ROOT Add project root, only important when the entry file - is not at the root of the project. + is not at the root of the project + -b BASELINE_JSON_FILE, --baseline BASELINE_JSON_FILE Path of a baseline report to compare against (only JSON-formatted files are accepted) - -j, --json Prints JSON instead of report. + + -j, --json Prints JSON instead of report + -o OUTPUT_FILE, --output OUTPUT_FILE Write report to filename + --ignore-nosec Do not skip lines with # nosec comments + -r, --recursive Find and process files in subdirectories + -x EXCLUDED_PATHS, --exclude EXCLUDED_PATHS Separate files with commas From 70257a719c3e496c6f5e3305ddb106d76e3e34c5 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Thu, 2 Aug 2018 19:49:51 -0700 Subject: [PATCH 238/291] Update README.rst --- README.rst | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 4f413d00..5c44f15a 100644 --- a/README.rst +++ b/README.rst @@ -63,11 +63,17 @@ Soon you will find a `README.rst`_ in every directory in the ``pyt/`` folder, `s How to Use ============ -1. Choose a web framework: `The -a option determines which functions will have their arguments tainted`_, by default it is Flask. +1. Choose a web framework -2. (optional) Customize source and sink information: Use the ``-t`` option to specify sources and sinks, by default `this file is used`_. +`The -a option determines which functions will have their arguments tainted`_, by default it is Flask. -3. (optional) Customize which library functions propagate taint: For functions that are imported from libraries, e.g. ``url_for`` or ``os.path.join``, use the ``-m`` option to specify whether or not they return tainted values given tainted inputs, by `default this file is used`_. +2. (optional) Customize source and sink information + +Use the ``-t`` option to specify sources and sinks, by default `this file is used`_. + +3. (optional) Customize which library functions propagate taint + +For functions from builtins or libraries, e.g. ``url_for`` or ``os.path.join``, use the ``-m`` option to specify whether or not they return tainted values given tainted inputs, by `default this file is used`_. .. _The -a option determines which functions will have their arguments tainted: https://github.com/python-security/pyt/tree/master/pyt/web_frameworks#web-frameworks .. _this file is used: https://github.com/python-security/pyt/blob/master/pyt/vulnerability_definitions/all_trigger_words.pyt From 5b50922e5bd5d95ec3e43b78aed3e5ae8001c6e6 Mon Sep 17 00:00:00 2001 From: bcaller Date: Fri, 3 Aug 2018 12:07:12 +0100 Subject: [PATCH 239/291] Iterate over args and kwargs with care ast.Call.args is [argument node] ast.Call.keywords however is [ast.keyword] You need to do isinstance(keyword_node.value, X) otherwise handling of args and kwargs will be different. --- pyt/cfg/stmt_visitor.py | 12 ++++++++---- pyt/helper_visitors/vars_visitor.py | 3 ++- tests/helper_visitors/vars_visitor_test.py | 4 ++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index 4acfea20..94c52ee0 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -615,7 +615,8 @@ def add_blackbox_or_builtin_call(self, node, blackbox): rhs_vars = list() last_return_value_of_nested_call = None - for arg in itertools.chain(node.args, node.keywords): + for arg_node in itertools.chain(node.args, node.keywords): + arg = arg_node.value if isinstance(arg_node, ast.keyword) else arg_node if isinstance(arg, ast.Call): return_value_of_nested_call = self.visit(arg) @@ -634,15 +635,18 @@ def add_blackbox_or_builtin_call(self, node, blackbox): call_node.inner_most_call = return_value_of_nested_call last_return_value_of_nested_call = return_value_of_nested_call - visual_args.append(return_value_of_nested_call.left_hand_side) + if isinstance(arg_node, ast.keyword) and arg_node.arg is not None: + visual_args.append(arg_node.arg + '=' + return_value_of_nested_call.left_hand_side) + else: + visual_args.append(return_value_of_nested_call.left_hand_side) rhs_vars.append(return_value_of_nested_call.left_hand_side) else: label = LabelVisitor() - label.visit(arg) + label.visit(arg_node) visual_args.append(label.result) vv = VarsVisitor() - vv.visit(arg) + vv.visit(arg_node) rhs_vars.extend(vv.result) if last_return_value_of_nested_call: # connect other_inner to outer in e.g. diff --git a/pyt/helper_visitors/vars_visitor.py b/pyt/helper_visitors/vars_visitor.py index 9708f86f..97a09053 100644 --- a/pyt/helper_visitors/vars_visitor.py +++ b/pyt/helper_visitors/vars_visitor.py @@ -84,7 +84,8 @@ def visit_Call(self, node): # This will not visit Flask in Flask(__name__) but it will visit request in `request.args.get() if not isinstance(node.func, ast.Name): self.visit(node.func) - for arg in itertools.chain(node.args, node.keywords): + for arg_node in itertools.chain(node.args, node.keywords): + arg = arg_node.value if isinstance(arg_node, ast.keyword) else arg_node if isinstance(arg, ast.Call): if isinstance(arg.func, ast.Name): # We can't just visit because we need to add 'ret_' diff --git a/tests/helper_visitors/vars_visitor_test.py b/tests/helper_visitors/vars_visitor_test.py index f248b6c4..f45e81c5 100644 --- a/tests/helper_visitors/vars_visitor_test.py +++ b/tests/helper_visitors/vars_visitor_test.py @@ -45,6 +45,10 @@ def test_call5(self): self.assertEqual(vars.result, ['resp', 'ret_replace']) def test_call6(self): + vars = self.perform_vars_on_expression("resp = f(kw=g(a, b))") + self.assertEqual(vars.result, ['resp', 'ret_g']) + + def test_call7(self): vars = self.perform_vars_on_expression("resp = make_response(html.replace.bar('{{ param }}', param))") self.assertEqual(vars.result, ['resp', 'ret_bar']) From f2586395329731819265e9cb5df5592138f0ebe9 Mon Sep 17 00:00:00 2001 From: bcaller Date: Tue, 31 Jul 2018 13:05:50 +0100 Subject: [PATCH 240/291] Don't crash on pathological case of f(g(a)(b)(c)) It's rare, but a curried function call can appear within a function call. It was raising a RuntimeError in VarsVisitor. We don't build a cfg properly for curried functions which is fine for now, but we don't need to crash. At least there is now defined behaviour. --- pyt/helper_visitors/vars_visitor.py | 24 ++++++++++++++++++++-- tests/helper_visitors/vars_visitor_test.py | 7 +++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/pyt/helper_visitors/vars_visitor.py b/pyt/helper_visitors/vars_visitor.py index 97a09053..8caa2571 100644 --- a/pyt/helper_visitors/vars_visitor.py +++ b/pyt/helper_visitors/vars_visitor.py @@ -96,12 +96,32 @@ def visit_Call(self, node): # func.value.id is html # We want replace self.result.append('ret_' + arg.func.attr) + elif isinstance(arg.func, ast.Call): + self.visit_curried_call_inside_call_args(arg) else: - # Deal with it when we have code that triggers it. - raise + raise Exception('Cannot visit vars of ' + ast.dump(arg)) else: self.visit(arg) + def visit_curried_call_inside_call_args(self, inner_call): + # Curried functions aren't supported really, but we now at least have a defined behaviour. + # In f(g(a)(b)(c)), inner_call is the Call node with argument c + # Try to get the name of curried function g + curried_func = inner_call.func.func + while isinstance(curried_func, ast.Call): + curried_func = curried_func.func + if isinstance(curried_func, ast.Name): + self.result.append('ret_' + curried_func.id) + elif isinstance(curried_func, ast.Attribute): + self.result.append('ret_' + curried_func.attr) + + # Visit all arguments except a (ignore the curried function g) + not_curried = inner_call + while not_curried.func is not curried_func: + for arg in itertools.chain(not_curried.args, not_curried.keywords): + self.visit(arg.value if isinstance(arg, ast.keyword) else arg) + not_curried = not_curried.func + def visit_Attribute(self, node): if not isinstance(node.value, ast.Name): self.visit(node.value) diff --git a/tests/helper_visitors/vars_visitor_test.py b/tests/helper_visitors/vars_visitor_test.py index f45e81c5..22dd6413 100644 --- a/tests/helper_visitors/vars_visitor_test.py +++ b/tests/helper_visitors/vars_visitor_test.py @@ -52,6 +52,13 @@ def test_call7(self): vars = self.perform_vars_on_expression("resp = make_response(html.replace.bar('{{ param }}', param))") self.assertEqual(vars.result, ['resp', 'ret_bar']) + def test_curried_function(self): + # Curried functions aren't supported really, but we now at least have a defined behaviour. + vars = self.perform_vars_on_expression('f(g.h(a)(b))') + self.assertCountEqual(vars.result, ['ret_h', 'b']) + vars = self.perform_vars_on_expression('f(g(a)(b)(c)(d, e=f))') + self.assertCountEqual(vars.result, ['ret_g', 'b', 'c', 'd', 'f']) + def test_keyword_vararg(self): vars = self.perform_vars_on_expression('print(arg = x)') self.assertEqual(vars.result, ['x']) From b0c32c0df3238979c4d8d182ffa6b26d396bba64 Mon Sep 17 00:00:00 2001 From: bcaller Date: Mon, 6 Aug 2018 11:20:39 +0100 Subject: [PATCH 241/291] Handle assignment unpacking `a, b, c = d` We already handle a, b, c = d, *e, f a, b, c = d() But `a, b, c = d` prints 'Assignment not properly handled.' This can be handled exactly like `a, b, c = (*d,)`, where taint in value `d` is propagated to all targets. --- pyt/cfg/stmt_visitor.py | 20 ++++++++++++-------- tests/cfg/cfg_test.py | 15 +++++++++++++++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index 4acfea20..f4f9d855 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -326,11 +326,11 @@ def visit_Try(self, node): return ControlFlowNode(try_node, last_statements, break_statements=body.break_statements) - def assign_tuple_target(self, node, right_hand_side_variables): + def assign_tuple_target(self, target_nodes, value_nodes, right_hand_side_variables): new_assignment_nodes = [] remaining_variables = list(right_hand_side_variables) - remaining_targets = list(node.targets[0].elts) - remaining_values = list(node.value.elts) # May contain duplicates + remaining_targets = list(target_nodes) + remaining_values = list(value_nodes) # May contain duplicates def visit(target, value): label = LabelVisitor() @@ -339,7 +339,7 @@ def visit(target, value): rhs_visitor.visit(value) if isinstance(value, ast.Call): new_ast_node = ast.Assign(target, value) - ast.copy_location(new_ast_node, node) + ast.copy_location(new_ast_node, target) new_assignment_nodes.append(self.assignment_call_node(label.result, new_ast_node)) else: label.result += ' = ' @@ -349,7 +349,7 @@ def visit(target, value): extract_left_hand_side(target), ast.Assign(target, value), rhs_visitor.result, - line_number=node.lineno, + line_number=target.lineno, path=self.filenames[-1] ))) remaining_targets.remove(target) @@ -358,7 +358,7 @@ def visit(target, value): remaining_variables.remove(var) # Pair targets and values until a Starred node is reached - for target, value in zip(node.targets[0].elts, node.value.elts): + for target, value in zip(target_nodes, value_nodes): if isinstance(target, ast.Starred) or isinstance(value, ast.Starred): break visit(target, value) @@ -380,7 +380,7 @@ def visit(target, value): extract_left_hand_side(target), ast.Assign(target, remaining_values[0]), remaining_variables, - line_number=node.lineno, + line_number=target.lineno, path=self.filenames[-1] ))) @@ -413,7 +413,7 @@ def visit_Assign(self, node): rhs_visitor.visit(node.value) if isinstance(node.targets[0], (ast.Tuple, ast.List)): # x,y = [1,2] if isinstance(node.value, (ast.Tuple, ast.List)): - return self.assign_tuple_target(node, rhs_visitor.result) + return self.assign_tuple_target(node.targets[0].elts, node.value.elts, rhs_visitor.result) elif isinstance(node.value, ast.Call): call = None for element in node.targets[0].elts: @@ -421,6 +421,10 @@ def visit_Assign(self, node): label.visit(element) call = self.assignment_call_node(label.result, node) return call + elif isinstance(node.value, ast.Name): # Treat `x, y = z` like `x, y = (*z,)` + value_node = ast.Starred(node.value, ast.Load()) + ast.copy_location(value_node, node) + return self.assign_tuple_target(node.targets[0].elts, [value_node], rhs_visitor.result) else: label = LabelVisitor() label.visit(node) diff --git a/tests/cfg/cfg_test.py b/tests/cfg/cfg_test.py index cc2df076..2c98afc7 100644 --- a/tests/cfg/cfg_test.py +++ b/tests/cfg/cfg_test.py @@ -820,6 +820,21 @@ def test_assignment_starred_list(self): [('a', ['d']), ('b', ['d']), ('c', ['e'])], ) + def test_unpacking_to_tuple(self): + self.cfg_create_from_ast(ast.parse('a, b, c = d')) + + middle_nodes = self.cfg.nodes[1:-1] + self.assert_length(middle_nodes, expected_length=3) + + self.assertCountEqual( + [n.label for n in middle_nodes], + ['a, b, c = *d'] * 3, + ) + self.assertCountEqual( + [(n.left_hand_side, n.right_hand_side_variables) for n in middle_nodes], + [('a', ['d']), ('b', ['d']), ('c', ['d'])], + ) + def test_augmented_assignment(self): self.cfg_create_from_ast(ast.parse('a+=f(b,c)')) From 7e7b9f1401f265acafac530e3d34b8b49de45ab2 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Thu, 9 Aug 2018 18:16:08 -0700 Subject: [PATCH 242/291] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19222a31..2aefd0ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,8 +69,8 @@ If you love PyT, please star our project on GitHub to show your support! :star: #### :telescope: Precision * Added per-arg taint, for sink functions ([#147], thanks [@bcaller]) +* AugAssign, Yield, and YieldFrom taint propagation improvements ([#155], thanks [@bcaller]) * Improved tuple assingment to be more precise and support starargs ([#150], thanks [@bcaller]) -* AugAssign, Yield, and YieldFrom taint propagation improvements ([#155], thanks [@bcaller]) [#147]: https://github.com/python-security/pyt/pull/147 [#150]: https://github.com/python-security/pyt/pull/150 From fe058b0457745b7a364b4546aeaacfe6e3a65b70 Mon Sep 17 00:00:00 2001 From: bcaller Date: Mon, 13 Aug 2018 17:26:44 +0100 Subject: [PATCH 243/291] Deterministic file loading and vulnerability order os.walk is not deterministic (though often on the same computer it will walk in the same order). This means that the vulnerabilities appear in different orders on different machines, making it hard to compare output. Process files in alphabetical order. --- pyt/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 192eaf99..b11e995b 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -79,7 +79,7 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 nosec_lines = defaultdict(set) - for path in files: + for path in sorted(files): if not args.ignore_nosec: nosec_lines[path] = retrieve_nosec_lines(path) From d6ad59fedd6c6f8ef7fc343384a7490efa688a1d Mon Sep 17 00:00:00 2001 From: bcaller Date: Mon, 13 Aug 2018 17:29:43 +0100 Subject: [PATCH 244/291] Don't repeat get_modules call if we use a project root The project_modules only need loading once if -pr is set, not once for every file. --- pyt/__main__.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index b11e995b..d578d4bc 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -79,15 +79,18 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 nosec_lines = defaultdict(set) + if args.project_root: + directory = os.path.normpath(args.project_root) + project_modules = get_modules(directory, prepend_module_root=args.prepend_module_root) + for path in sorted(files): if not args.ignore_nosec: nosec_lines[path] = retrieve_nosec_lines(path) - if args.project_root: - directory = os.path.normpath(args.project_root) - else: + if not args.project_root: directory = os.path.dirname(path) - project_modules = get_modules(directory, prepend_module_root=args.prepend_module_root) + project_modules = get_modules(directory, prepend_module_root=args.prepend_module_root) + local_modules = get_directory_modules(directory) tree = generate_ast(path) From c4893e733f09b52c209f2516773e5eb8b82d4ca5 Mon Sep 17 00:00:00 2001 From: bcaller Date: Wed, 15 Aug 2018 15:45:42 +0100 Subject: [PATCH 245/291] Remove extraneous reassignments in output The output should consist of the path from the source to the sink. Anything which happens after the source reaches the sink is irrelevant and just makes the output longer and confusing to interpret. None of the lines removed from the tests actually affected the vulnerability chain. Perhaps this should be dealt with somewhere in the definition_chain or vulnerability functions: here we just trim the chain upon reaching the sink in the vulnerability_helper. --- pyt/vulnerabilities/vulnerability_helper.py | 14 +++-- tests/vulnerabilities/vulnerabilities_test.py | 52 ------------------- 2 files changed, 6 insertions(+), 60 deletions(-) diff --git a/pyt/vulnerabilities/vulnerability_helper.py b/pyt/vulnerabilities/vulnerability_helper.py index 8c3b84a4..a89ce1bb 100644 --- a/pyt/vulnerabilities/vulnerability_helper.py +++ b/pyt/vulnerabilities/vulnerability_helper.py @@ -3,6 +3,7 @@ import json from enum import Enum from collections import namedtuple +from itertools import takewhile from ..core.node_types import YieldNode @@ -56,16 +57,13 @@ def __init__( self.sink = sink self.sink_trigger_word = sink_trigger_word - self.reassignment_nodes = reassignment_nodes - self._remove_sink_from_secondary_nodes() + # Remove the sink node and all nodes after the sink from the list of reassignments. + self.reassignment_nodes = list(takewhile( + lambda node: node is not sink, + reassignment_nodes + )) self._remove_non_propagating_yields() - def _remove_sink_from_secondary_nodes(self): - try: - self.reassignment_nodes.remove(self.sink) - except ValueError: # pragma: no cover - pass - def _remove_non_propagating_yields(self): """Remove yield with no variables e.g. `yield 123` and plain `yield` from vulnerability.""" for node in list(self.reassignment_nodes): diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 04267140..ff48c38c 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -150,12 +150,6 @@ def test_XSS_result(self): Reassigned in: File: examples/vulnerable_code/XSS.py > Line 6: param = ~call_1 - File: examples/vulnerable_code/XSS.py - > Line 9: ~call_3 = ret_make_response(~call_4) - File: examples/vulnerable_code/XSS.py - > Line 9: resp = ~call_3 - File: examples/vulnerable_code/XSS.py - > Line 10: ret_XSS1 = resp File: examples/vulnerable_code/XSS.py > reaches line 9, sink "replace(": ~call_4 = ret_html.replace('{{ param }}', param) @@ -274,8 +268,6 @@ def test_path_traversal_sanitised_result(self): > Line 10: image_name = ~call_2 File: examples/vulnerable_code/path_traversal_sanitised.py > Line 12: ~call_4 = ret_os.path.join(~call_5, image_name) - File: examples/vulnerable_code/path_traversal_sanitised.py - > Line 12: ret_cat_picture = ~call_3 File: examples/vulnerable_code/path_traversal_sanitised.py > reaches line 12, sink "send_file(": ~call_3 = ret_send_file(~call_4) @@ -297,8 +289,6 @@ def test_path_traversal_sanitised_2_result(self): > Line 8: image_name = ~call_1 File: examples/vulnerable_code/path_traversal_sanitised_2.py > Line 12: ~call_3 = ret_os.path.join(~call_4, image_name) - File: examples/vulnerable_code/path_traversal_sanitised_2.py - > Line 12: ret_cat_picture = ~call_2 File: examples/vulnerable_code/path_traversal_sanitised_2.py > reaches line 12, sink "send_file(": ~call_2 = ret_send_file(~call_3) @@ -318,8 +308,6 @@ def test_sql_result(self): Reassigned in: File: examples/vulnerable_code/sql/sqli.py > Line 26: param = ~call_1 - File: examples/vulnerable_code/sql/sqli.py - > Line 27: result = ~call_2 File: examples/vulnerable_code/sql/sqli.py > reaches line 27, sink "execute(": ~call_2 = ret_db.engine.execute(param) @@ -335,13 +323,6 @@ def test_XSS_form_result(self): File: examples/vulnerable_code/XSS_form.py > User input at line 14, source "form[": data = request.form['my_text'] - Reassigned in: - File: examples/vulnerable_code/XSS_form.py - > Line 15: ~call_1 = ret_make_response(~call_2) - File: examples/vulnerable_code/XSS_form.py - > Line 15: resp = ~call_1 - File: examples/vulnerable_code/XSS_form.py - > Line 17: ret_example2_action = resp File: examples/vulnerable_code/XSS_form.py > reaches line 15, sink "replace(": ~call_2 = ret_html1.replace('{{ data }}', data) @@ -360,12 +341,6 @@ def test_XSS_url_result(self): Reassigned in: File: examples/vulnerable_code/XSS_url.py > Line 6: param = url - File: examples/vulnerable_code/XSS_url.py - > Line 9: ~call_2 = ret_make_response(~call_3) - File: examples/vulnerable_code/XSS_url.py - > Line 9: resp = ~call_2 - File: examples/vulnerable_code/XSS_url.py - > Line 10: ret_XSS1 = resp File: examples/vulnerable_code/XSS_url.py > reaches line 9, sink "replace(": ~call_3 = ret_html.replace('{{ param }}', param) @@ -390,12 +365,6 @@ def test_XSS_reassign_result(self): > Line 6: param = ~call_1 File: examples/vulnerable_code/XSS_reassign.py > Line 8: param = param + '' - File: examples/vulnerable_code/XSS_reassign.py - > Line 11: ~call_3 = ret_make_response(~call_4) - File: examples/vulnerable_code/XSS_reassign.py - > Line 11: resp = ~call_3 - File: examples/vulnerable_code/XSS_reassign.py - > Line 12: ret_XSS1 = resp File: examples/vulnerable_code/XSS_reassign.py > reaches line 11, sink "replace(": ~call_4 = ret_html.replace('{{ param }}', param) @@ -418,12 +387,6 @@ def test_XSS_sanitised_result(self): > Line 9: ~call_2 = ret_Markup.escape(param) File: examples/vulnerable_code/XSS_sanitised.py > Line 9: param = ~call_2 - File: examples/vulnerable_code/XSS_sanitised.py - > Line 12: ~call_4 = ret_make_response(~call_5) - File: examples/vulnerable_code/XSS_sanitised.py - > Line 12: resp = ~call_4 - File: examples/vulnerable_code/XSS_sanitised.py - > Line 13: ret_XSS1 = resp File: examples/vulnerable_code/XSS_sanitised.py > reaches line 12, sink "replace(": ~call_5 = ret_html.replace('{{ param }}', param) @@ -449,12 +412,6 @@ def test_XSS_variable_assign_result(self): > Line 6: param = ~call_1 File: examples/vulnerable_code/XSS_variable_assign.py > Line 8: other_var = param + '' - File: examples/vulnerable_code/XSS_variable_assign.py - > Line 11: ~call_3 = ret_make_response(~call_4) - File: examples/vulnerable_code/XSS_variable_assign.py - > Line 11: resp = ~call_3 - File: examples/vulnerable_code/XSS_variable_assign.py - > Line 12: ret_XSS1 = resp File: examples/vulnerable_code/XSS_variable_assign.py > reaches line 11, sink "replace(": ~call_4 = ret_html.replace('{{ param }}', other_var) @@ -479,12 +436,6 @@ def test_XSS_variable_multiple_assign_result(self): > Line 10: not_the_same_var = '' + other_var File: examples/vulnerable_code/XSS_variable_multiple_assign.py > Line 12: another_one = not_the_same_var + '' - File: examples/vulnerable_code/XSS_variable_multiple_assign.py - > Line 15: ~call_3 = ret_make_response(~call_4) - File: examples/vulnerable_code/XSS_variable_multiple_assign.py - > Line 15: resp = ~call_3 - File: examples/vulnerable_code/XSS_variable_multiple_assign.py - > Line 17: ret_XSS1 = resp File: examples/vulnerable_code/XSS_variable_multiple_assign.py > reaches line 15, sink "replace(": ~call_4 = ret_html.replace('{{ param }}', another_one) @@ -550,9 +501,6 @@ def test_django_view_param(self): File: examples/vulnerable_code/django_XSS.py > User input at line 4, source "Framework function URL parameter": param - Reassigned in: - File: examples/vulnerable_code/django_XSS.py - > Line 5: ret_xss1 = ~call_1 File: examples/vulnerable_code/django_XSS.py > reaches line 5, sink "render(": ~call_1 = ret_render(request, 'templates/xss.html', 'param'param) From d2a8189738f067987504befb08304bd043e62323 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Thu, 16 Aug 2018 11:55:30 -0700 Subject: [PATCH 246/291] Removed reference to UImode.NORMAL --- pyt/__main__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index d578d4bc..0cab673f 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -65,11 +65,9 @@ def retrieve_nosec_lines( def main(command_line_args=sys.argv[1:]): # noqa: C901 args = parse_args(command_line_args) - ui_mode = UImode.NORMAL + ui_mode = UImode.TRIM if args.interactive: ui_mode = UImode.INTERACTIVE - elif args.trim_reassigned_in: - ui_mode = UImode.TRIM files = discover_files( args.targets, From 88113437e8fbd6e883bf30b9ae795cbd3a755d86 Mon Sep 17 00:00:00 2001 From: bcaller Date: Thu, 16 Aug 2018 12:30:46 +0100 Subject: [PATCH 247/291] Taint propagates from methods of tainted objects Previously `x = TAINT.lower()` would be tainted (due to special handling for assignment_call_nodes) but `x = str(TAINT.lower())` wouldn't be tainted. To fix this, `TAINT` is added to the RHS variables of `TAINT.lower()`. This will mean that e.g. `request` will be a RHS variable of `request.get()`, but I think that will be OK. In the test which changed, the additional line is because resp has become tainted. However, this still leaves the following false negatives to fix another day: `assert_vulnerable('result = str("%s" % str(TAINT.lower()))') # FAILS` `assert_vulnerable('result = str("%s" % TAINT.lower().upper())') # FAILS` --- pyt/cfg/stmt_visitor.py | 27 +++++++++---------- tests/vulnerabilities/vulnerabilities_test.py | 19 +++++++++++-- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index de8e396e..00a0fb2f 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -11,6 +11,7 @@ ) from ..core.ast_helper import ( generate_ast, + get_call_names, get_call_names_as_string ) from ..core.module_definitions import ( @@ -472,14 +473,6 @@ def assignment_call_node(self, left_hand_label, ast_node): call = self.visit(ast_node.value) call_label = call.left_hand_side - if isinstance(call, BBorBInode): - # Necessary to know e.g. - # `image_name = image_name.replace('..', '')` - # is a reassignment. - vars_visitor = VarsVisitor() - vars_visitor.visit(ast_node.value) - call.right_hand_side_variables.extend(vars_visitor.result) - call_assignment = AssignmentCallNode( left_hand_label + ' = ' + call_label, left_hand_label, @@ -572,7 +565,7 @@ def visit_While(self, node): return self.loop_node_skeleton(test, node) - def add_blackbox_or_builtin_call(self, node, blackbox): + def add_blackbox_or_builtin_call(self, node, blackbox): # noqa: C901 """Processes a blackbox or builtin function when it is called. Nothing gets assigned to ret_func_foo in the builtin/blackbox case. @@ -597,14 +590,14 @@ def add_blackbox_or_builtin_call(self, node, blackbox): saved_function_call_index = self.function_call_index self.undecided = False - call_label = LabelVisitor() - call_label.visit(node) + call_label_visitor = LabelVisitor() + call_label_visitor.visit(node) - index = call_label.result.find('(') + call_function_label = call_label_visitor.result[:call_label_visitor.result.find('(')] # Create e.g. ~call_1 = ret_func_foo LHS = CALL_IDENTIFIER + 'call_' + str(saved_function_call_index) - RHS = 'ret_' + call_label.result[:index] + '(' + RHS = 'ret_' + call_function_label + '(' call_node = BBorBInode( label='', @@ -613,7 +606,7 @@ def add_blackbox_or_builtin_call(self, node, blackbox): right_hand_side_variables=[], line_number=node.lineno, path=self.filenames[-1], - func_name=call_label.result[:index] + func_name=call_function_label ) visual_args = list() rhs_vars = list() @@ -657,6 +650,11 @@ def add_blackbox_or_builtin_call(self, node, blackbox): # `scrypt.outer(scrypt.inner(image_name), scrypt.other_inner(image_name))` last_return_value_of_nested_call.connect(call_node) + call_names = list(get_call_names(node.func)) + if len(call_names) > 1: + # taint is a RHS variable (self) of taint.lower() + rhs_vars.append(call_names[0]) + if len(visual_args) > 0: for arg in visual_args: RHS = RHS + arg + ", " @@ -667,7 +665,6 @@ def add_blackbox_or_builtin_call(self, node, blackbox): call_node.label = LHS + " = " + RHS call_node.right_hand_side_variables = rhs_vars - # Used in get_sink_args, not using right_hand_side_variables because it is extended in assignment_call_node rhs_visitor = RHSVisitor() rhs_visitor.visit(node) call_node.args = rhs_visitor.result diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index ff48c38c..599ea4aa 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -111,8 +111,9 @@ def test_build_sanitiser_node_dict(self): self.assertEqual(sanitiser_dict['escape'][0], cfg.nodes[3]) - def run_analysis(self, path): - self.cfg_create_from_file(path) + def run_analysis(self, path=None): + if path: + self.cfg_create_from_file(path) cfg_list = [self.cfg] FrameworkAdaptor(cfg_list, [], [], is_flask_route_function) @@ -468,6 +469,20 @@ def test_yield(self): self.assertAlphaEqual(str(vuln), EXPECTED_VULNERABILITY_DESCRIPTION) + def test_method_of_taint(self): + def assert_vulnerable(fixture): + tree = ast.parse('TAINT = request.args.get("")\n' + fixture + '\nexecute(result)') + self.cfg_create_from_ast(tree) + vulnerabilities = self.run_analysis() + self.assert_length(vulnerabilities, expected_length=1, msg=fixture) + + assert_vulnerable('result = TAINT') + assert_vulnerable('result = TAINT.lower()') + assert_vulnerable('result = str(TAINT)') + assert_vulnerable('result = str(TAINT.lower())') + assert_vulnerable('result = repr(str("%s" % TAINT.lower().upper()))') + assert_vulnerable('result = repr(str("{}".format(TAINT.lower())))') + class EngineDjangoTest(VulnerabilitiesBaseTestCase): def run_analysis(self, path): From b14a26799ff02902a6c517aba9a874484d3fada0 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 21 Aug 2018 19:02:07 -0700 Subject: [PATCH 248/291] Always define cfg_list --- pyt/__main__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyt/__main__.py b/pyt/__main__.py index 0cab673f..80188311 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -81,6 +81,7 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 directory = os.path.normpath(args.project_root) project_modules = get_modules(directory, prepend_module_root=args.prepend_module_root) + cfg_list = list() for path in sorted(files): if not args.ignore_nosec: nosec_lines[path] = retrieve_nosec_lines(path) From 3fc8046ffffe802f8498080f6a59a2087b8abed1 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 21 Aug 2018 19:03:55 -0700 Subject: [PATCH 249/291] [version] Bump to 0.39 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c6b082f4..9809fcdd 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -VERSION = '0.38' +VERSION = '0.39' setup( From 12619b787f32dc5f7c6f5dd840d072cee2aaba18 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 20 Aug 2018 09:34:02 -0700 Subject: [PATCH 250/291] Remove --trim option and UImode Enum Explicitly always make reassignment_nodes the vulnerability chain Fix tests after removing UImode Add comment to argparse hack Update root README.rst --- README.rst | 55 +++++++++-------- pyt/__main__.py | 14 ++--- pyt/cfg/stmt_visitor.py | 1 + pyt/usage.py | 60 ++++++++----------- pyt/vulnerabilities/__init__.py | 8 +-- pyt/vulnerabilities/vulnerabilities.py | 38 +++++------- pyt/vulnerabilities/vulnerability_helper.py | 13 +--- tests/usage_test.py | 39 ++++++------ .../vulnerabilities_across_files_test.py | 22 ++----- tests/vulnerabilities/vulnerabilities_test.py | 29 ++------- 10 files changed, 104 insertions(+), 175 deletions(-) diff --git a/README.rst b/README.rst index 5c44f15a..ad79c794 100644 --- a/README.rst +++ b/README.rst @@ -86,53 +86,56 @@ Usage .. code-block:: usage: python -m pyt [-h] [-a ADAPTOR] [-pr PROJECT_ROOT] - [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] - [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] - [-r] [-x EXCLUDED_PATHS] [-trim] [-i] - targets [targets ...] + [-b BASELINE_JSON_FILE] [-j] [-t TRIGGER_WORD_FILE] + [-m BLACKBOX_MAPPING_FILE] [-i] [-o OUTPUT_FILE] + [--ignore-nosec] [-r] [-x EXCLUDED_PATHS] + [--dont-prepend-root] [--no-local-imports] + targets [targets ...] required arguments: - targets source file(s) or directory(s) to be tested + targets source file(s) or directory(s) to be scanned important optional arguments: -a ADAPTOR, --adaptor ADAPTOR - Choose a web framework adaptor: Flask(Default), - Django, Every or Pylons - + Choose a web framework adaptor: Flask(Default), + Django, Every or Pylons + -t TRIGGER_WORD_FILE, --trigger-word-file TRIGGER_WORD_FILE - Input file with a list of sources and sinks - + Input file with a list of sources and sinks + -m BLACKBOX_MAPPING_FILE, --blackbox-mapping-file BLACKBOX_MAPPING_FILE - Input blackbox mapping file + Input blackbox mapping file optional arguments: -pr PROJECT_ROOT, --project-root PROJECT_ROOT - Add project root, only important when the entry file - is not at the root of the project + Add project root, only important when the entry file + is not at the root of the project. -b BASELINE_JSON_FILE, --baseline BASELINE_JSON_FILE - Path of a baseline report to compare against (only - JSON-formatted files are accepted) + Path of a baseline report to compare against (only + JSON-formatted files are accepted) + + -j, --json Prints JSON instead of report. - -j, --json Prints JSON instead of report + -i, --interactive Will ask you about each blackbox function call in + vulnerability chains. -o OUTPUT_FILE, --output OUTPUT_FILE - Write report to filename + Write report to filename - --ignore-nosec Do not skip lines with # nosec comments + --ignore-nosec Do not skip lines with # nosec comments - -r, --recursive Find and process files in subdirectories + -r, --recursive Find and process files in subdirectories -x EXCLUDED_PATHS, --exclude EXCLUDED_PATHS - Separate files with commas + Separate files with commas + --dont-prepend-root In project root e.g. /app, imports are not prepended + with app.* - print arguments: - -trim, --trim-reassigned-in - Trims the reassigned list to just the vulnerability - chain. - -i, --interactive Will ask you about each blackbox function call in - vulnerability chains. + --no-local-imports If set, absolute imports must be relative to the + project root. If not set, modules in the same + directory can be imported just by their names. Usage from Source ================= diff --git a/pyt/__main__.py b/pyt/__main__.py index 0cab673f..da24f7af 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -19,8 +19,7 @@ from .usage import parse_args from .vulnerabilities import ( find_vulnerabilities, - get_vulnerabilities_not_in_baseline, - UImode + get_vulnerabilities_not_in_baseline ) from .vulnerabilities.vulnerability_helper import SanitisedVulnerability from .web_frameworks import ( @@ -65,10 +64,6 @@ def retrieve_nosec_lines( def main(command_line_args=sys.argv[1:]): # noqa: C901 args = parse_args(command_line_args) - ui_mode = UImode.TRIM - if args.interactive: - ui_mode = UImode.INTERACTIVE - files = discover_files( args.targets, args.excluded_paths, @@ -122,9 +117,9 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 analyse(cfg_list) vulnerabilities = find_vulnerabilities( cfg_list, - ui_mode, args.blackbox_mapping_file, args.trigger_word_file, + args.interactive, nosec_lines ) @@ -139,7 +134,10 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 else: text.report(vulnerabilities, args.output_file) - has_unsanitized_vulnerabilities = any(not isinstance(v, SanitisedVulnerability) for v in vulnerabilities) + has_unsanitized_vulnerabilities = any( + not isinstance(v, SanitisedVulnerability) + for v in vulnerabilities + ) if has_unsanitized_vulnerabilities: sys.exit(1) diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index 00a0fb2f..ce4d198b 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -665,6 +665,7 @@ def add_blackbox_or_builtin_call(self, node, blackbox): # noqa: C901 call_node.label = LHS + " = " + RHS call_node.right_hand_side_variables = rhs_vars + # Used in get_sink_args rhs_visitor = RHSVisitor() rhs_visitor.visit(node) call_node.args = rhs_visitor.result diff --git a/pyt/usage.py b/pyt/usage.py index 7325a7c8..d5b6efbe 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -20,8 +20,9 @@ def _add_required_group(parser): required_group = parser.add_argument_group('required arguments') required_group.add_argument( - 'targets', metavar='targets', type=str, nargs='+', - help='source file(s) or directory(s) to be tested' + 'targets', metavar='targets', nargs='+', + help='source file(s) or directory(s) to be scanned', + type=str ) @@ -54,6 +55,12 @@ def _add_optional_group(parser): action='http://github.com/store_true', default=False ) + optional_group.add_argument( + '-t', '--trigger-word-file', + help='Input file with a list of sources and sinks', + type=str, + default=default_trigger_word_file + ) optional_group.add_argument( '-m', '--blackbox-mapping-file', help='Input blackbox mapping file.', @@ -61,14 +68,14 @@ def _add_optional_group(parser): default=default_blackbox_mapping_file ) optional_group.add_argument( - '-t', '--trigger-word-file', - help='Input file with a list of sources and sinks', - type=str, - default=default_trigger_word_file + '-i', '--interactive', + help='Will ask you about each blackbox function call in vulnerability chains.', + action='http://github.com/store_true', + default=False ) optional_group.add_argument( '-o', '--output', - help='write report to filename', + help='Write report to filename', dest='output_file', action='http://github.com/store', type=argparse.FileType('w'), @@ -78,11 +85,13 @@ def _add_optional_group(parser): '--ignore-nosec', dest='ignore_nosec', action='http://github.com/store_true', - help='do not skip lines with # nosec comments' + help='Do not skip lines with # nosec comments' ) optional_group.add_argument( - '-r', '--recursive', dest='recursive', - action='http://github.com/store_true', help='find and process files in subdirectories' + '-r', '--recursive', + dest='recursive', + action='http://github.com/store_true', + help='Find and process files in subdirectories' ) optional_group.add_argument( '-x', '--exclude', @@ -108,39 +117,18 @@ def _add_optional_group(parser): ) -def _add_print_group(parser): - print_group = parser.add_argument_group('print arguments') - print_group.add_argument( - '-trim', '--trim-reassigned-in', - help='Trims the reassigned list to just the vulnerability chain.', - action='http://github.com/store_true', - default=True - ) - print_group.add_argument( - '-i', '--interactive', - help='Will ask you about each blackbox function call in vulnerability chains.', - action='http://github.com/store_true', - default=False - ) - - -def _check_required_and_mutually_exclusive_args(parser, args): - if args.targets is None: - parser.error('The targets argument is required') - - def parse_args(args): if len(args) == 0: args.append('-h') parser = argparse.ArgumentParser(prog='python -m pyt') + + # Hack to in order to list required args above optional parser._action_groups.pop() + _add_required_group(parser) _add_optional_group(parser) - _add_print_group(parser) args = parser.parse_args(args) - _check_required_and_mutually_exclusive_args( - parser, - args - ) + if args.targets is None: + parser.error('The targets argument is required') return args diff --git a/pyt/vulnerabilities/__init__.py b/pyt/vulnerabilities/__init__.py index 992af18a..1dd115de 100644 --- a/pyt/vulnerabilities/__init__.py +++ b/pyt/vulnerabilities/__init__.py @@ -1,12 +1,8 @@ from .vulnerabilities import find_vulnerabilities -from .vulnerability_helper import ( - get_vulnerabilities_not_in_baseline, - UImode -) +from .vulnerability_helper import get_vulnerabilities_not_in_baseline __all__ = [ 'find_vulnerabilities', - 'get_vulnerabilities_not_in_baseline', - 'UImode' + 'get_vulnerabilities_not_in_baseline' ] diff --git a/pyt/vulnerabilities/vulnerabilities.py b/pyt/vulnerabilities/vulnerabilities.py index f169c4b2..47986ed8 100644 --- a/pyt/vulnerabilities/vulnerabilities.py +++ b/pyt/vulnerabilities/vulnerabilities.py @@ -23,8 +23,7 @@ TriggerNode, Triggers, vuln_factory, - VulnerabilityType, - UImode + VulnerabilityType ) @@ -306,7 +305,7 @@ def how_vulnerable( sanitiser_nodes, potential_sanitiser, blackbox_assignments, - ui_mode, + interactive, vuln_deets ): """Iterates through the chain of nodes and checks the blackbox nodes against the blackbox mapping and sanitiser dictionary. @@ -320,7 +319,7 @@ def how_vulnerable( sanitiser_nodes(set): A set of nodes that are sanitisers for the sink. potential_sanitiser(Node): An if or elif node that can potentially cause sanitisation. blackbox_assignments(set[AssignmentNode]): set of blackbox assignments, includes the ReturnNode's of BBorBInode's. - ui_mode(UImode): determines if we interact with the user when we don't already have a blackbox mapping available. + interactive(bool): determines if we ask the user about blackbox functions not in the mapping file. vuln_deets(dict): vulnerability details. Returns: @@ -337,7 +336,7 @@ def how_vulnerable( continue elif current_node.func_name in blackbox_mapping['does_not_propagate']: return VulnerabilityType.FALSE - elif ui_mode == UImode.INTERACTIVE: + elif interactive: user_says = input( 'Is the return value of {} with tainted argument "{}" vulnerable? (Y/n)'.format( current_node.label, @@ -378,7 +377,7 @@ def get_vulnerability( triggers, lattice, cfg, - ui_mode, + interactive, blackbox_mapping ): """Get vulnerability between source and sink if it exists. @@ -395,7 +394,7 @@ def get_vulnerability( triggers(Triggers): Triggers of the CFG. lattice(Lattice): the lattice we're analysing. cfg(CFG): .blackbox_assignments used in is_unknown, .nodes used in build_def_use_chain - ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. + interactive(bool): determines if we ask the user about blackbox functions not in the mapping file. blackbox_mapping(dict): A map of blackbox functions containing whether or not they propagate taint. Returns: @@ -421,8 +420,7 @@ def get_vulnerability( 'source': source.cfg_node, 'source_trigger_word': source.trigger_word, 'sink': sink.cfg_node, - 'sink_trigger_word': sink.trigger_word, - 'reassignment_nodes': source.secondary_nodes + 'sink_trigger_word': sink.trigger_word } sanitiser_nodes = set() @@ -450,14 +448,13 @@ def get_vulnerability( sanitiser_nodes, potential_sanitiser, cfg.blackbox_assignments, - ui_mode, + interactive, vuln_deets ) if vulnerability_type == VulnerabilityType.FALSE: continue - if ui_mode != UImode.NORMAL: - vuln_deets['reassignment_nodes'] = chain + vuln_deets['reassignment_nodes'] = chain return vuln_factory(vulnerability_type)(**vuln_deets) @@ -468,9 +465,9 @@ def find_vulnerabilities_in_cfg( cfg, definitions, lattice, - ui_mode, blackbox_mapping, vulnerabilities_list, + interactive, nosec_lines ): """Find vulnerabilities in a cfg. @@ -479,10 +476,9 @@ def find_vulnerabilities_in_cfg( cfg(CFG): The CFG to find vulnerabilities in. definitions(trigger_definitions_parser.Definitions): Source and sink definitions. lattice(Lattice): the lattice we're analysing. - ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. blackbox_mapping(dict): A map of blackbox functions containing whether or not they propagate taint. vulnerabilities_list(list): That we append to when we find vulnerabilities. - nosec_lines(dict): filenames mapped to their nosec lines + interactive(bool): determines if we ask the user about blackbox functions not in the mapping file. """ triggers = identify_triggers( cfg, @@ -499,7 +495,7 @@ def find_vulnerabilities_in_cfg( triggers, lattice, cfg, - ui_mode, + interactive, blackbox_mapping ) if vulnerability: @@ -508,20 +504,18 @@ def find_vulnerabilities_in_cfg( def find_vulnerabilities( cfg_list, - ui_mode, blackbox_mapping_file, sources_and_sinks_file, + interactive=False, nosec_lines=defaultdict(set) ): """Find vulnerabilities in a list of CFGs from a trigger_word_file. Args: cfg_list(list[CFG]): the list of CFGs to scan. - ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. blackbox_mapping_file(str) sources_and_sinks_file(str) - nosec_lines(dict): filenames mapped to their nosec lines - + interactive(bool): determines if we ask the user about blackbox functions not in the mapping file. Returns: A list of vulnerabilities. """ @@ -535,13 +529,13 @@ def find_vulnerabilities( cfg, definitions, Lattice(cfg.nodes), - ui_mode, blackbox_mapping, vulnerabilities, + interactive, nosec_lines ) - if ui_mode == UImode.INTERACTIVE: + if interactive: with open(blackbox_mapping_file, 'w') as outfile: json.dump(blackbox_mapping, outfile, indent=4) diff --git a/pyt/vulnerabilities/vulnerability_helper.py b/pyt/vulnerabilities/vulnerability_helper.py index a89ce1bb..ee22622f 100644 --- a/pyt/vulnerabilities/vulnerability_helper.py +++ b/pyt/vulnerabilities/vulnerability_helper.py @@ -3,7 +3,6 @@ import json from enum import Enum from collections import namedtuple -from itertools import takewhile from ..core.node_types import YieldNode @@ -15,12 +14,6 @@ class VulnerabilityType(Enum): UNKNOWN = 3 -class UImode(Enum): - INTERACTIVE = 0 - NORMAL = 1 - TRIM = 2 - - def vuln_factory(vulnerability_type): if vulnerability_type == VulnerabilityType.UNKNOWN: return UnknownVulnerability @@ -57,11 +50,7 @@ def __init__( self.sink = sink self.sink_trigger_word = sink_trigger_word - # Remove the sink node and all nodes after the sink from the list of reassignments. - self.reassignment_nodes = list(takewhile( - lambda node: node is not sink, - reassignment_nodes - )) + self.reassignment_nodes = reassignment_nodes self._remove_non_propagating_yields() def _remove_non_propagating_yields(self): diff --git a/tests/usage_test.py b/tests/usage_test.py index 027c6f00..363786bd 100644 --- a/tests/usage_test.py +++ b/tests/usage_test.py @@ -26,14 +26,14 @@ def test_no_args(self): self.maxDiff = None EXPECTED = """usage: python -m pyt [-h] [-a ADAPTOR] [-pr PROJECT_ROOT] - [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] - [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] - [-r] [-x EXCLUDED_PATHS] [--dont-prepend-root] - [--no-local-imports] [-trim] [-i] + [-b BASELINE_JSON_FILE] [-j] [-t TRIGGER_WORD_FILE] + [-m BLACKBOX_MAPPING_FILE] [-i] [-o OUTPUT_FILE] + [--ignore-nosec] [-r] [-x EXCLUDED_PATHS] + [--dont-prepend-root] [--no-local-imports] targets [targets ...] required arguments: - targets source file(s) or directory(s) to be tested + targets source file(s) or directory(s) to be scanned optional arguments: -a ADAPTOR, --adaptor ADAPTOR @@ -46,28 +46,23 @@ def test_no_args(self): Path of a baseline report to compare against (only JSON-formatted files are accepted) -j, --json Prints JSON instead of report. - -m BLACKBOX_MAPPING_FILE, --blackbox-mapping-file BLACKBOX_MAPPING_FILE - Input blackbox mapping file. -t TRIGGER_WORD_FILE, --trigger-word-file TRIGGER_WORD_FILE Input file with a list of sources and sinks + -m BLACKBOX_MAPPING_FILE, --blackbox-mapping-file BLACKBOX_MAPPING_FILE + Input blackbox mapping file. + -i, --interactive Will ask you about each blackbox function call in + vulnerability chains. -o OUTPUT_FILE, --output OUTPUT_FILE - write report to filename - --ignore-nosec do not skip lines with # nosec comments - -r, --recursive find and process files in subdirectories + Write report to filename + --ignore-nosec Do not skip lines with # nosec comments + -r, --recursive Find and process files in subdirectories -x EXCLUDED_PATHS, --exclude EXCLUDED_PATHS Separate files with commas --dont-prepend-root In project root e.g. /app, imports are not prepended with app.* --no-local-imports If set, absolute imports must be relative to the project root. If not set, modules in the same - directory can be imported just by their names. - -print arguments: - -trim, --trim-reassigned-in - Trims the reassigned list to just the vulnerability - chain. - -i, --interactive Will ask you about each blackbox function call in - vulnerability chains.\n""" + directory can be imported just by their names.\n""" self.assertEqual(stdout.getvalue(), EXPECTED) @@ -77,10 +72,10 @@ def test_valid_args_but_no_targets(self): parse_args(['-j']) EXPECTED = """usage: python -m pyt [-h] [-a ADAPTOR] [-pr PROJECT_ROOT] - [-b BASELINE_JSON_FILE] [-j] [-m BLACKBOX_MAPPING_FILE] - [-t TRIGGER_WORD_FILE] [-o OUTPUT_FILE] [--ignore-nosec] - [-r] [-x EXCLUDED_PATHS] [--dont-prepend-root] - [--no-local-imports] [-trim] [-i] + [-b BASELINE_JSON_FILE] [-j] [-t TRIGGER_WORD_FILE] + [-m BLACKBOX_MAPPING_FILE] [-i] [-o OUTPUT_FILE] + [--ignore-nosec] [-r] [-x EXCLUDED_PATHS] + [--dont-prepend-root] [--no-local-imports] targets [targets ...] python -m pyt: error: the following arguments are required: targets\n""" diff --git a/tests/vulnerabilities/vulnerabilities_across_files_test.py b/tests/vulnerabilities/vulnerabilities_across_files_test.py index 70529f0d..bd63b190 100644 --- a/tests/vulnerabilities/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities/vulnerabilities_across_files_test.py @@ -12,10 +12,7 @@ default_blackbox_mapping_file, default_trigger_word_file ) -from pyt.vulnerabilities import ( - find_vulnerabilities, - UImode -) +from pyt.vulnerabilities import find_vulnerabilities from pyt.web_frameworks import ( FrameworkAdaptor, is_flask_route_function @@ -41,7 +38,6 @@ def run_analysis(self, path): return find_vulnerabilities( cfg_list, - UImode.NORMAL, default_blackbox_mapping_file, default_trigger_word_file ) @@ -92,7 +88,7 @@ def test_builtin_with_user_defined_inner(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/nested_functions_code/builtin_with_user_defined_inner.py > User input at line 16, source "form[": - req_param = request.form['suggestion'] + req_param = request.form['suggestion'] Reassigned in: File: examples/nested_functions_code/builtin_with_user_defined_inner.py > Line 10: save_2_req_param = req_param @@ -104,8 +100,6 @@ def test_builtin_with_user_defined_inner(self): > Line 11: yes_vuln = inner_arg + 'hey' File: examples/nested_functions_code/builtin_with_user_defined_inner.py > Line 12: ret_inner = yes_vuln - File: examples/nested_functions_code/builtin_with_user_defined_inner.py - > Line 10: req_param = save_2_req_param File: examples/nested_functions_code/builtin_with_user_defined_inner.py > Line 19: ~call_2 = ret_inner File: examples/nested_functions_code/builtin_with_user_defined_inner.py @@ -167,7 +161,7 @@ def test_sink_with_result_of_user_defined_nested(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py > User input at line 16, source "form[": - req_param = request.form['suggestion'] + req_param = request.form['suggestion'] Reassigned in: File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py > Line 6: save_1_req_param = req_param @@ -181,8 +175,6 @@ def test_sink_with_result_of_user_defined_nested(self): > Line 11: inner_ret_val = inner_arg + 'hey' File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py > Line 12: ret_inner = inner_ret_val - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 10: req_param = save_2_req_param File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py > Line 17: ~call_2 = ret_inner File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py @@ -193,8 +185,6 @@ def test_sink_with_result_of_user_defined_nested(self): > Line 7: outer_ret_val = outer_arg + 'hey' File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py > Line 8: ret_outer = outer_ret_val - File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py - > Line 6: req_param = save_1_req_param File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py > Line 17: ~call_1 = ret_outer File: examples/nested_functions_code/sink_with_result_of_user_defined_nested.py @@ -250,7 +240,7 @@ def test_sink_with_user_defined_inner(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/nested_functions_code/sink_with_user_defined_inner.py > User input at line 16, source "form[": - req_param = request.form['suggestion'] + req_param = request.form['suggestion'] Reassigned in: File: examples/nested_functions_code/sink_with_user_defined_inner.py > Line 6: save_2_req_param = req_param @@ -264,8 +254,6 @@ def test_sink_with_user_defined_inner(self): > Line 11: inner_ret_val = inner_arg + 'hey' File: examples/nested_functions_code/sink_with_user_defined_inner.py > Line 12: ret_inner = inner_ret_val - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 10: req_param = save_3_req_param File: examples/nested_functions_code/sink_with_user_defined_inner.py > Line 18: ~call_3 = ret_inner File: examples/nested_functions_code/sink_with_user_defined_inner.py @@ -276,8 +264,6 @@ def test_sink_with_user_defined_inner(self): > Line 7: outer_ret_val = outer_arg + 'hey' File: examples/nested_functions_code/sink_with_user_defined_inner.py > Line 8: ret_outer = outer_ret_val - File: examples/nested_functions_code/sink_with_user_defined_inner.py - > Line 6: req_param = save_2_req_param File: examples/nested_functions_code/sink_with_user_defined_inner.py > Line 18: ~call_2 = ret_outer File: examples/nested_functions_code/sink_with_user_defined_inner.py diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 599ea4aa..607fbb0c 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -12,7 +12,6 @@ ) from pyt.vulnerabilities import ( find_vulnerabilities, - UImode, vulnerabilities ) from pyt.vulnerabilities.trigger_definitions_parser import ( @@ -123,7 +122,6 @@ def run_analysis(self, path=None): return find_vulnerabilities( cfg_list, - UImode.NORMAL, default_blackbox_mapping_file, default_trigger_word_file ) @@ -201,8 +199,6 @@ def test_path_traversal_result(self): > Line 7: outer_ret_val = outer_arg + 'hey' + other_arg File: examples/vulnerable_code/path_traversal.py > Line 8: ret_outer = outer_ret_val - File: examples/vulnerable_code/path_traversal.py - > Line 6: image_name = save_2_image_name File: examples/vulnerable_code/path_traversal.py > Line 19: ~call_2 = ret_outer File: examples/vulnerable_code/path_traversal.py @@ -229,28 +225,14 @@ def test_ensure_saved_scope(self): > Line 6: save_2_image_name = image_name File: examples/vulnerable_code/ensure_saved_scope.py > Line 10: save_3_image_name = image_name - File: examples/vulnerable_code/ensure_saved_scope.py - > Line 10: image_name = save_3_image_name - File: examples/vulnerable_code/ensure_saved_scope.py - > Line 19: temp_2_other_arg = image_name - File: examples/vulnerable_code/ensure_saved_scope.py - > Line 6: other_arg = temp_2_other_arg - File: examples/vulnerable_code/ensure_saved_scope.py - > Line 7: outer_ret_val = outer_arg + 'hey' + other_arg - File: examples/vulnerable_code/ensure_saved_scope.py - > Line 8: ret_outer = outer_ret_val - File: examples/vulnerable_code/ensure_saved_scope.py - > Line 6: image_name = save_2_image_name - File: examples/vulnerable_code/ensure_saved_scope.py - > Line 19: ~call_2 = ret_outer - File: examples/vulnerable_code/ensure_saved_scope.py - > Line 19: foo = ~call_2 File: examples/vulnerable_code/ensure_saved_scope.py > reaches line 20, sink "send_file(": ~call_4 = ret_send_file(image_name) """ - - self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) + self.assertAlphaEqual( + vulnerability_description, + EXPECTED_VULNERABILITY_DESCRIPTION + ) def test_path_traversal_sanitised_result(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/path_traversal_sanitised.py') @@ -502,7 +484,6 @@ def run_analysis(self, path): return find_vulnerabilities( cfg_list, - UImode.NORMAL, default_blackbox_mapping_file, trigger_word_file ) @@ -541,7 +522,6 @@ def run_analysis(self, path): return find_vulnerabilities( cfg_list, - UImode.NORMAL, default_blackbox_mapping_file, trigger_word_file ) @@ -568,7 +548,6 @@ def run_analysis(self): return find_vulnerabilities( cfg_list, - UImode.NORMAL, default_blackbox_mapping_file, trigger_word_file ) From 2e91ce7606f93011f9257f0a59e3e01e6919a30b Mon Sep 17 00:00:00 2001 From: bcaller Date: Thu, 30 Aug 2018 15:48:12 +0100 Subject: [PATCH 251/291] Chained function calls separated into multiple assignments Take the example from examples/vulnerable_code/sql/sqli.py: `result = session.query(User).filter("username={}".format(TAINT))` The `filter` function is marked as a sink. However, previously this did not get marked as a vulnerability. The call label used to be `session.query`, ignoring the filter function. Now, when the file is read, it is transformed into 2 lines: ``` __chain_tmp_1 = session.query(User) result = __chain_tmp_1.filter("username={}".format(TAINT)) ``` And the vulnerability is found. We don't find everything here: just ordinary assignments and return statements. We can't just transform all Call nodes here since Call nodes can appear in many different scenarios e.g. comprehensions, bare function calls. --- pyt/core/ast_helper.py | 4 +- pyt/core/transformer.py | 52 ++++++++++++++++++- tests/base_test_case.py | 3 +- tests/cfg/cfg_test.py | 32 ++++++++++++ tests/core/transformer_test.py | 22 +++++++- tests/vulnerabilities/vulnerabilities_test.py | 2 +- 6 files changed, 108 insertions(+), 7 deletions(-) diff --git a/pyt/core/ast_helper.py b/pyt/core/ast_helper.py index 9a16267c..4ca1ca69 100644 --- a/pyt/core/ast_helper.py +++ b/pyt/core/ast_helper.py @@ -6,7 +6,7 @@ import subprocess from functools import lru_cache -from .transformer import AsyncTransformer +from .transformer import PytTransformer BLACK_LISTED_CALL_NAMES = ['self'] @@ -35,7 +35,7 @@ def generate_ast(path): with open(path, 'r') as f: try: tree = ast.parse(f.read()) - return AsyncTransformer().visit(tree) + return PytTransformer().visit(tree) except SyntaxError: # pragma: no cover global recursive if not recursive: diff --git a/pyt/core/transformer.py b/pyt/core/transformer.py index 12051c89..ccdd82c4 100644 --- a/pyt/core/transformer.py +++ b/pyt/core/transformer.py @@ -1,7 +1,7 @@ import ast -class AsyncTransformer(ast.NodeTransformer): +class AsyncTransformer(): """Converts all async nodes into their synchronous counterparts.""" def visit_Await(self, node): @@ -16,3 +16,53 @@ def visit_AsyncFor(self, node): def visit_AsyncWith(self, node): return self.visit(ast.With(**node.__dict__)) + + +class ChainedFunctionTransformer(): + def visit_chain(self, node, depth=1): + if ( + isinstance(node.value, ast.Call) and + isinstance(node.value.func, ast.Attribute) and + isinstance(node.value.func.value, ast.Call) + ): + # Node is assignment or return with value like `b.c().d()` + call_node = node.value + # If we want to handle nested functions in future, depth needs fixing + temp_var_id = '__chain_tmp_{}'.format(depth) + # AST tree is from right to left, so d() is the outer Call and b.c() is the inner Call + unvisited_inner_call = ast.Assign( + targets=[ast.Name(id=temp_var_id, ctx=ast.Store())], + value=call_node.func.value, + ) + ast.copy_location(unvisited_inner_call, node) + inner_calls = self.visit_chain(unvisited_inner_call, depth + 1) + for inner_call_node in inner_calls: + ast.copy_location(inner_call_node, node) + outer_call = self.generic_visit(type(node)( + value=ast.Call( + func=ast.Attribute( + value=ast.Name(id=temp_var_id, ctx=ast.Load()), + attr=call_node.func.attr, + ctx=ast.Load(), + ), + args=call_node.args, + keywords=call_node.keywords, + ), + **{field: value for field, value in ast.iter_fields(node) if field != 'value'} # e.g. targets + )) + ast.copy_location(outer_call, node) + ast.copy_location(outer_call.value, node) + ast.copy_location(outer_call.value.func, node) + return [*inner_calls, outer_call] + else: + return [self.generic_visit(node)] + + def visit_Assign(self, node): + return self.visit_chain(node) + + def visit_Return(self, node): + return self.visit_chain(node) + + +class PytTransformer(AsyncTransformer, ChainedFunctionTransformer, ast.NodeTransformer): + pass diff --git a/tests/base_test_case.py b/tests/base_test_case.py index 1283bf47..bd471073 100644 --- a/tests/base_test_case.py +++ b/tests/base_test_case.py @@ -4,6 +4,7 @@ from pyt.cfg import make_cfg from pyt.core.ast_helper import generate_ast from pyt.core.module_definitions import project_definitions +from pyt.core.transformer import PytTransformer class BaseTestCase(unittest.TestCase): @@ -36,7 +37,7 @@ def cfg_create_from_ast( ): project_definitions.clear() self.cfg = make_cfg( - ast_tree, + PytTransformer().visit(ast_tree), project_modules, local_modules, filename='?' diff --git a/tests/cfg/cfg_test.py b/tests/cfg/cfg_test.py index 2c98afc7..3c215983 100644 --- a/tests/cfg/cfg_test.py +++ b/tests/cfg/cfg_test.py @@ -1497,3 +1497,35 @@ def test_name_for(self): self.assert_length(self.cfg.nodes, expected_length=4) self.assertEqual(self.cfg.nodes[1].label, 'for x in l:') + + +class CFGFunctionChain(CFGBaseTestCase): + def test_simple(self): + self.cfg_create_from_ast( + ast.parse('a = b.c(z)') + ) + middle_nodes = self.cfg.nodes[1:-1] + self.assert_length(middle_nodes, expected_length=2) + self.assertEqual(middle_nodes[0].label, '~call_1 = ret_b.c(z)') + self.assertEqual(middle_nodes[0].func_name, 'b.c') + self.assertCountEqual(middle_nodes[0].right_hand_side_variables, ['z', 'b']) + + def test_chain(self): + self.cfg_create_from_ast( + ast.parse('a = b.xxx.c(z).d(y)') + ) + middle_nodes = self.cfg.nodes[1:-1] + self.assert_length(middle_nodes, expected_length=4) + + self.assertEqual(middle_nodes[0].left_hand_side, '~call_1') + self.assertCountEqual(middle_nodes[0].right_hand_side_variables, ['b', 'z']) + self.assertEqual(middle_nodes[0].label, '~call_1 = ret_b.xxx.c(z)') + + self.assertEqual(middle_nodes[1].left_hand_side, '__chain_tmp_1') + self.assertCountEqual(middle_nodes[1].right_hand_side_variables, ['~call_1']) + + self.assertEqual(middle_nodes[2].left_hand_side, '~call_2') + self.assertCountEqual(middle_nodes[2].right_hand_side_variables, ['__chain_tmp_1', 'y']) + + self.assertEqual(middle_nodes[3].left_hand_side, 'a') + self.assertCountEqual(middle_nodes[3].right_hand_side_variables, ['~call_2']) diff --git a/tests/core/transformer_test.py b/tests/core/transformer_test.py index 8233287b..2676c70d 100644 --- a/tests/core/transformer_test.py +++ b/tests/core/transformer_test.py @@ -1,13 +1,14 @@ import ast import unittest -from pyt.core.transformer import AsyncTransformer +from pyt.core.transformer import PytTransformer class TransformerTest(unittest.TestCase): """Tests for the AsyncTransformer.""" def test_async_removed_by_transformer(self): + self.maxDiff = 99999 async_tree = ast.parse("\n".join([ "async def a():", " async for b in c():", @@ -30,7 +31,24 @@ def test_async_removed_by_transformer(self): ])) self.assertIsInstance(sync_tree.body[0], ast.FunctionDef) - transformed = AsyncTransformer().visit(async_tree) + transformed = PytTransformer().visit(async_tree) self.assertIsInstance(transformed.body[0], ast.FunctionDef) self.assertEqual(ast.dump(transformed), ast.dump(sync_tree)) + + def test_chained_function(self): + chained_tree = ast.parse("\n".join([ + "def a():", + " b = c.d(e).f(g).h(i).j(k)", + ])) + + separated_tree = ast.parse("\n".join([ + "def a():", + " __chain_tmp_3 = c.d(e)", + " __chain_tmp_2 = __chain_tmp_3.f(g)", + " __chain_tmp_1 = __chain_tmp_2.h(i)", + " b = __chain_tmp_1.j(k)", + ])) + + transformed = PytTransformer().visit(chained_tree) + self.assertEqual(ast.dump(transformed), ast.dump(separated_tree)) diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 607fbb0c..52fbfb2c 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -282,7 +282,7 @@ def test_path_traversal_sanitised_2_result(self): def test_sql_result(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/sql/sqli.py') - self.assert_length(vulnerabilities, expected_length=2) + self.assert_length(vulnerabilities, expected_length=3) vulnerability_description = str(vulnerabilities[0]) EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/sql/sqli.py From 2bc84138c8e1bfec82f8cf33aeb99f472f5cdfa2 Mon Sep 17 00:00:00 2001 From: bcaller Date: Wed, 5 Sep 2018 14:47:46 +0100 Subject: [PATCH 252/291] Add colourful formatter "screen" Prints vulnerabilities with ANSI colour codes for the terminal. Not crazily colourful: just tries to highlight the important stuff. Repeated filenames aren't printed. Colour scheme might not be to everyone's taste. --- .coveragerc | 1 + pyt/formatters/screen.py | 104 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 pyt/formatters/screen.py diff --git a/.coveragerc b/.coveragerc index df3137cf..26a84f47 100644 --- a/.coveragerc +++ b/.coveragerc @@ -15,4 +15,5 @@ source = ./tests omit = pyt/formatters/json.py + pyt/formatters/screen.py pyt/formatters/text.py diff --git a/pyt/formatters/screen.py b/pyt/formatters/screen.py new file mode 100644 index 00000000..3a8ddb80 --- /dev/null +++ b/pyt/formatters/screen.py @@ -0,0 +1,104 @@ +"""This formatter outputs the issues as color-coded text.""" +from ..vulnerabilities.vulnerability_helper import SanitisedVulnerability, UnknownVulnerability + +RESET = '\033[0m' +BOLD = '\033[1m' +UNDERLINE = '\033[4m' +DANGER = '\033[31m' +GOOD = '\033[32m' +HIGHLIGHT = '\033[45;1m' +RED_ON_WHITE = '\033[31m\033[107m' + + +def color(string, color_string): + return color_string + str(string) + RESET + + +def report( + vulnerabilities, + fileobj, + print_sanitised, +): + """ + Prints issues in color-coded text format. + + Args: + vulnerabilities: list of vulnerabilities to report + fileobj: The output file object, which may be sys.stdout + """ + n_vulnerabilities = len(vulnerabilities) + unsanitised_vulnerabilities = [v for v in vulnerabilities if not isinstance(v, SanitisedVulnerability)] + n_unsanitised = len(unsanitised_vulnerabilities) + n_sanitised = n_vulnerabilities - n_unsanitised + heading = "{} vulnerabilit{} found{}.\n".format( + 'No' if n_unsanitised == 0 else n_unsanitised, + 'y' if n_unsanitised == 1 else 'ies', + " (plus {} sanitised)".format(n_sanitised) if n_sanitised else "", + ) + vulnerabilities_to_print = vulnerabilities if print_sanitised else unsanitised_vulnerabilities + with fileobj: + for i, vulnerability in enumerate(vulnerabilities_to_print, start=1): + fileobj.write(vulnerability_to_str(i, vulnerability)) + + if n_unsanitised == 0: + fileobj.write(color(heading, GOOD)) + else: + fileobj.write(color(heading, DANGER)) + + +def vulnerability_to_str(i, vulnerability): + lines = [] + lines.append(color('Vulnerability {}'.format(i), UNDERLINE)) + lines.append('File: {}'.format(color(vulnerability.source.path, BOLD))) + lines.append( + 'User input at line {}, source "{}":'.format( + vulnerability.source.line_number, + color(vulnerability.source_trigger_word, HIGHLIGHT), + ) + ) + lines.append('\t{}'.format(color(vulnerability.source.label, RED_ON_WHITE))) + if vulnerability.reassignment_nodes: + previous_path = None + lines.append('Reassigned in:') + for node in vulnerability.reassignment_nodes: + if node.path != previous_path: + lines.append('\tFile: {}'.format(node.path)) + previous_path = node.path + label = node.label + if ( + isinstance(vulnerability, SanitisedVulnerability) and + node.label == vulnerability.sanitiser.label + ): + label = color(label, GOOD) + lines.append( + '\t Line {}:\t{}'.format( + node.line_number, + label, + ) + ) + if vulnerability.source.path != vulnerability.sink.path: + lines.append('File: {}'.format(color(vulnerability.sink.path, BOLD))) + lines.append( + 'Reaches line {}, sink "{}"'.format( + vulnerability.sink.line_number, + color(vulnerability.sink_trigger_word, HIGHLIGHT), + ) + ) + lines.append('\t{}'.format( + color(vulnerability.sink.label, RED_ON_WHITE) + )) + if isinstance(vulnerability, SanitisedVulnerability): + lines.append( + 'This vulnerability is {}{} by {}'.format( + color('potentially ', BOLD) if not vulnerability.confident else '', + color('sanitised', GOOD), + color(vulnerability.sanitiser.label, BOLD), + ) + ) + elif isinstance(vulnerability, UnknownVulnerability): + lines.append( + 'This vulnerability is unknown due to "{}"'.format( + color(vulnerability.unknown_assignment.label, BOLD), + ) + ) + return '\n'.join(lines) + '\n\n' From 4f80f97286cdfc5b32488a04eec7ecb49019297a Mon Sep 17 00:00:00 2001 From: bcaller Date: Wed, 5 Sep 2018 14:50:41 +0100 Subject: [PATCH 253/291] Consistent spelling of sanitise --- pyt/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 558f1d3c..4bea43d2 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -135,11 +135,11 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 else: text.report(vulnerabilities, args.output_file) - has_unsanitized_vulnerabilities = any( + has_unsanitised_vulnerabilities = any( not isinstance(v, SanitisedVulnerability) for v in vulnerabilities ) - if has_unsanitized_vulnerabilities: + if has_unsanitised_vulnerabilities: sys.exit(1) From c07551dbcfcd369570ff9e1dfadafa18e8acc060 Mon Sep 17 00:00:00 2001 From: bcaller Date: Wed, 5 Sep 2018 14:48:56 +0100 Subject: [PATCH 254/291] Add --only-unsanitised flag to not print sanitised vulnerabilities It is sometimes what you want, but often you just want the failures without sanitised vulns in the output. --- pyt/__main__.py | 9 +-------- pyt/formatters/json.py | 11 ++++++++--- pyt/formatters/text.py | 26 +++++++++++++++++--------- pyt/usage.py | 30 ++++++++++++++++++++++++------ tests/main_test.py | 33 +++++++++++++++++++++------------ tests/usage_test.py | 15 +++++++++------ 6 files changed, 80 insertions(+), 44 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 4bea43d2..3e6a9911 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -12,10 +12,6 @@ get_directory_modules, get_modules ) -from .formatters import ( - json, - text -) from .usage import parse_args from .vulnerabilities import ( find_vulnerabilities, @@ -130,10 +126,7 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 args.baseline ) - if args.json: - json.report(vulnerabilities, args.output_file) - else: - text.report(vulnerabilities, args.output_file) + args.formatter.report(vulnerabilities, args.output_file, not args.only_unsanitised) has_unsanitised_vulnerabilities = any( not isinstance(v, SanitisedVulnerability) diff --git a/pyt/formatters/json.py b/pyt/formatters/json.py index efc95b95..8e0eab11 100644 --- a/pyt/formatters/json.py +++ b/pyt/formatters/json.py @@ -1,12 +1,14 @@ """This formatter outputs the issues in JSON.""" - import json from datetime import datetime +from ..vulnerabilities.vulnerability_helper import SanitisedVulnerability + def report( vulnerabilities, - fileobj + fileobj, + print_sanitised, ): """ Prints issues in JSON format. @@ -19,7 +21,10 @@ def report( machine_output = { 'generated_at': time_string, - 'vulnerabilities': [vuln.as_dict() for vuln in vulnerabilities] + 'vulnerabilities': [ + vuln.as_dict() for vuln in vulnerabilities + if print_sanitised or not isinstance(vuln, SanitisedVulnerability) + ] } result = json.dumps( diff --git a/pyt/formatters/text.py b/pyt/formatters/text.py index 7961b05e..2041e006 100644 --- a/pyt/formatters/text.py +++ b/pyt/formatters/text.py @@ -1,9 +1,11 @@ """This formatter outputs the issues as plain text.""" +from ..vulnerabilities.vulnerability_helper import SanitisedVulnerability def report( vulnerabilities, - fileobj + fileobj, + print_sanitised, ): """ Prints issues in text format. @@ -11,15 +13,21 @@ def report( Args: vulnerabilities: list of vulnerabilities to report fileobj: The output file object, which may be sys.stdout + print_sanitised: Print just unsanitised vulnerabilities or sanitised vulnerabilities as well """ - number_of_vulnerabilities = len(vulnerabilities) + n_vulnerabilities = len(vulnerabilities) + unsanitised_vulnerabilities = [v for v in vulnerabilities if not isinstance(v, SanitisedVulnerability)] + n_unsanitised = len(unsanitised_vulnerabilities) + n_sanitised = n_vulnerabilities - n_unsanitised + heading = "{} vulnerabilit{} found{}{}\n".format( + 'No' if n_unsanitised == 0 else n_unsanitised, + 'y' if n_unsanitised == 1 else 'ies', + " (plus {} sanitised)".format(n_sanitised) if n_sanitised else "", + ':' if n_vulnerabilities else '.', + ) + vulnerabilities_to_print = vulnerabilities if print_sanitised else unsanitised_vulnerabilities with fileobj: - if number_of_vulnerabilities == 0: - fileobj.write('No vulnerabilities found.\n') - elif number_of_vulnerabilities == 1: - fileobj.write('%s vulnerability found:\n' % number_of_vulnerabilities) - else: - fileobj.write('%s vulnerabilities found:\n' % number_of_vulnerabilities) + fileobj.write(heading) - for i, vulnerability in enumerate(vulnerabilities, start=1): + for i, vulnerability in enumerate(vulnerabilities_to_print, start=1): fileobj.write('Vulnerability {}:\n{}\n\n'.format(i, vulnerability)) diff --git a/pyt/usage.py b/pyt/usage.py index d5b6efbe..7acc5be7 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -2,6 +2,8 @@ import os import sys +from .formatters import json, screen, text + default_blackbox_mapping_file = os.path.join( os.path.dirname(__file__), @@ -49,12 +51,6 @@ def _add_optional_group(parser): default=False, metavar='BASELINE_JSON_FILE', ) - optional_group.add_argument( - '-j', '--json', - help='Prints JSON instead of report.', - action='http://github.com/store_true', - default=False - ) optional_group.add_argument( '-t', '--trigger-word-file', help='Input file with a list of sources and sinks', @@ -115,6 +111,28 @@ def _add_optional_group(parser): default=True, dest='allow_local_imports' ) + optional_group.add_argument( + '-u', '--only-unsanitised', + help="Don't print sanitised vulnerabilities.", + action='http://github.com/store_true', + default=False, + ) + parser.set_defaults(formatter=text) + formatter_group = optional_group.add_mutually_exclusive_group() + formatter_group.add_argument( + '-j', '--json', + help='Prints JSON instead of report.', + action='http://github.com/store_const', + const=json, + dest='formatter', + ) + formatter_group.add_argument( + '-s', '--screen', + help='Prints colorful report.', + action='http://github.com/store_const', + const=screen, + dest='formatter', + ) def parse_args(args): diff --git a/tests/main_test.py b/tests/main_test.py index 1037d843..1e33ee24 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -8,72 +8,81 @@ class MainTest(BaseTestCase): @mock.patch('pyt.__main__.discover_files') @mock.patch('pyt.__main__.parse_args') @mock.patch('pyt.__main__.find_vulnerabilities') - @mock.patch('pyt.__main__.text') + @mock.patch('pyt.formatters.text') def test_text_output(self, mock_text, mock_find_vulnerabilities, mock_parse_args, mock_discover_files): mock_find_vulnerabilities.return_value = 'stuff' example_file = 'examples/vulnerable_code/inter_command_injection.py' output_file = 'mocked_outfile' + import pyt.formatters.text mock_discover_files.return_value = [example_file] mock_parse_args.return_value = mock.Mock( project_root=None, baseline=None, - json=None, - output_file=output_file + formatter=pyt.formatters.text, + output_file=output_file, + only_unsanitised=False, ) with self.assertRaises(SystemExit): main(['parse_args is mocked']) assert mock_text.report.call_count == 1 mock_text.report.assert_called_with( mock_find_vulnerabilities.return_value, - mock_parse_args.return_value.output_file + mock_parse_args.return_value.output_file, + True, ) @mock.patch('pyt.__main__.discover_files') @mock.patch('pyt.__main__.parse_args') @mock.patch('pyt.__main__.find_vulnerabilities') - @mock.patch('pyt.__main__.text') + @mock.patch('pyt.formatters.text') def test_no_vulns_found(self, mock_text, mock_find_vulnerabilities, mock_parse_args, mock_discover_files): mock_find_vulnerabilities.return_value = [] example_file = 'examples/vulnerable_code/inter_command_injection.py' output_file = 'mocked_outfile' + import pyt.formatters.text mock_discover_files.return_value = [example_file] mock_parse_args.return_value = mock.Mock( project_root=None, baseline=None, - json=None, - output_file=output_file + formatter=pyt.formatters.text, + output_file=output_file, + only_unsanitised=True, ) main(['parse_args is mocked']) # No SystemExit assert mock_text.report.call_count == 1 mock_text.report.assert_called_with( mock_find_vulnerabilities.return_value, - mock_parse_args.return_value.output_file + mock_parse_args.return_value.output_file, + False, ) @mock.patch('pyt.__main__.discover_files') @mock.patch('pyt.__main__.parse_args') @mock.patch('pyt.__main__.find_vulnerabilities') - @mock.patch('pyt.__main__.json') + @mock.patch('pyt.formatters.json') def test_json_output(self, mock_json, mock_find_vulnerabilities, mock_parse_args, mock_discover_files): mock_find_vulnerabilities.return_value = 'stuff' example_file = 'examples/vulnerable_code/inter_command_injection.py' output_file = 'mocked_outfile' + import pyt.formatters.json mock_discover_files.return_value = [example_file] mock_parse_args.return_value = mock.Mock( project_root=None, baseline=None, - json=True, - output_file=output_file + formatter=pyt.formatters.json, + output_file=output_file, + only_unsanitised=False, ) with self.assertRaises(SystemExit): main(['parse_args is mocked']) assert mock_json.report.call_count == 1 mock_json.report.assert_called_with( mock_find_vulnerabilities.return_value, - mock_parse_args.return_value.output_file + mock_parse_args.return_value.output_file, + True, ) diff --git a/tests/usage_test.py b/tests/usage_test.py index 363786bd..294004a4 100644 --- a/tests/usage_test.py +++ b/tests/usage_test.py @@ -26,10 +26,10 @@ def test_no_args(self): self.maxDiff = None EXPECTED = """usage: python -m pyt [-h] [-a ADAPTOR] [-pr PROJECT_ROOT] - [-b BASELINE_JSON_FILE] [-j] [-t TRIGGER_WORD_FILE] + [-b BASELINE_JSON_FILE] [-t TRIGGER_WORD_FILE] [-m BLACKBOX_MAPPING_FILE] [-i] [-o OUTPUT_FILE] [--ignore-nosec] [-r] [-x EXCLUDED_PATHS] - [--dont-prepend-root] [--no-local-imports] + [--dont-prepend-root] [--no-local-imports] [-u] [-j | -s] targets [targets ...] required arguments: @@ -45,7 +45,6 @@ def test_no_args(self): -b BASELINE_JSON_FILE, --baseline BASELINE_JSON_FILE Path of a baseline report to compare against (only JSON-formatted files are accepted) - -j, --json Prints JSON instead of report. -t TRIGGER_WORD_FILE, --trigger-word-file TRIGGER_WORD_FILE Input file with a list of sources and sinks -m BLACKBOX_MAPPING_FILE, --blackbox-mapping-file BLACKBOX_MAPPING_FILE @@ -62,7 +61,11 @@ def test_no_args(self): with app.* --no-local-imports If set, absolute imports must be relative to the project root. If not set, modules in the same - directory can be imported just by their names.\n""" + directory can be imported just by their names. + -u, --only-unsanitised + Don't print sanitised vulnerabilities. + -j, --json Prints JSON instead of report. + -s, --screen Prints colorful report.\n""" self.assertEqual(stdout.getvalue(), EXPECTED) @@ -72,10 +75,10 @@ def test_valid_args_but_no_targets(self): parse_args(['-j']) EXPECTED = """usage: python -m pyt [-h] [-a ADAPTOR] [-pr PROJECT_ROOT] - [-b BASELINE_JSON_FILE] [-j] [-t TRIGGER_WORD_FILE] + [-b BASELINE_JSON_FILE] [-t TRIGGER_WORD_FILE] [-m BLACKBOX_MAPPING_FILE] [-i] [-o OUTPUT_FILE] [--ignore-nosec] [-r] [-x EXCLUDED_PATHS] - [--dont-prepend-root] [--no-local-imports] + [--dont-prepend-root] [--no-local-imports] [-u] [-j | -s] targets [targets ...] python -m pyt: error: the following arguments are required: targets\n""" From bf4925901e1c7fafb9b5e329d69481baf6008c99 Mon Sep 17 00:00:00 2001 From: bcaller Date: Wed, 5 Sep 2018 16:29:30 +0100 Subject: [PATCH 255/291] Add basic python logging to pyt with -v -vv -vvv Very basic python logging added to pyt. Very useful when you want to see: - which files are being processed - if your imports are not being inspected - which file crashed pyt --- pyt/__main__.py | 14 ++++++++++++++ pyt/cfg/stmt_visitor.py | 21 ++++++++++++++++++--- pyt/core/ast_helper.py | 8 ++++---- pyt/usage.py | 6 +++++- tests/usage_test.py | 5 +++-- 5 files changed, 44 insertions(+), 10 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 3e6a9911..d952cbe3 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -1,5 +1,6 @@ """The comand line module of PyT.""" +import logging import os import sys from collections import defaultdict @@ -26,6 +27,8 @@ is_function_without_leading_ ) +log = logging.getLogger(__name__) + def discover_files(targets, excluded_files, recursive=False): included_files = list() @@ -37,11 +40,13 @@ def discover_files(targets, excluded_files, recursive=False): if file.endswith('.py') and file not in excluded_list: fullpath = os.path.join(root, file) included_files.append(fullpath) + log.debug('Discovered file: %s', fullpath) if not recursive: break else: if target not in excluded_list: included_files.append(target) + log.debug('Discovered file: %s', target) return included_files @@ -60,6 +65,14 @@ def retrieve_nosec_lines( def main(command_line_args=sys.argv[1:]): # noqa: C901 args = parse_args(command_line_args) + logging_level = ( + logging.ERROR if not args.verbose else + logging.WARN if args.verbose == 1 else + logging.INFO if args.verbose == 2 else + logging.DEBUG + ) + logging.basicConfig(level=logging_level, format='[%(levelname)s] %(name)s: %(message)s') + files = discover_files( args.targets, args.excluded_paths, @@ -74,6 +87,7 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 cfg_list = list() for path in sorted(files): + log.info("Processing %s", path) if not args.ignore_nosec: nosec_lines[path] = retrieve_nosec_lines(path) diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index ce4d198b..95913211 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -1,6 +1,8 @@ import ast import itertools +import logging import os.path +from pkgutil import iter_modules from .alias_helper import ( as_alias_handler, @@ -52,6 +54,9 @@ remove_breaks ) +log = logging.getLogger(__name__) +uninspectable_modules = {module.name for module in iter_modules()} # Don't warn about failing to import these + class StmtVisitor(ast.NodeVisitor): def __init__(self, allow_local_directory_imports=True): @@ -429,9 +434,12 @@ def visit_Assign(self, node): else: label = LabelVisitor() label.visit(node) - print('Assignment not properly handled.', - 'Could result in not finding a vulnerability.', - 'Assignment:', label.result) + log.warn( + 'Assignment not properly handled in %s. Could result in not finding a vulnerability.' + 'Assignment: %s', + getattr(self, 'filenames', ['?'])[0], + self.label.result, + ) return self.append_node(AssignmentNode( label.result, label.result, @@ -1022,6 +1030,10 @@ def visit_Import(self, node): name.asname, retrieve_import_alias_mapping(node.names) ) + for alias in node.names: + if alias.name not in uninspectable_modules: + log.warn("Cannot inspect module %s", alias.name) + uninspectable_modules.add(alias.name) # Don't repeatedly warn about this return IgnoredNode() def visit_ImportFrom(self, node): @@ -1061,4 +1073,7 @@ def visit_ImportFrom(self, node): retrieve_import_alias_mapping(node.names), from_from=True ) + if node.module not in uninspectable_modules: + log.warn("Cannot inspect module %s", node.module) + uninspectable_modules.add(node.module) return IgnoredNode() diff --git a/pyt/core/ast_helper.py b/pyt/core/ast_helper.py index 4ca1ca69..e4ccbca2 100644 --- a/pyt/core/ast_helper.py +++ b/pyt/core/ast_helper.py @@ -2,13 +2,14 @@ Useful when working with the ast module.""" import ast +import logging import os import subprocess from functools import lru_cache from .transformer import PytTransformer - +log = logging.getLogger(__name__) BLACK_LISTED_CALL_NAMES = ['self'] recursive = False @@ -16,11 +17,10 @@ def _convert_to_3(path): # pragma: no cover """Convert python 2 file to python 3.""" try: - print('##### Trying to convert file to Python 3. #####') + log.warn('##### Trying to convert %s to Python 3. #####', path) subprocess.call(['2to3', '-w', path]) except subprocess.SubprocessError: - print('Check if 2to3 is installed. ' - 'https://docs.python.org/2/library/2to3.html') + log.exception('Check if 2to3 is installed. https://docs.python.org/2/library/2to3.html') exit(1) diff --git a/pyt/usage.py b/pyt/usage.py index 7acc5be7..9de776cf 100644 --- a/pyt/usage.py +++ b/pyt/usage.py @@ -30,7 +30,11 @@ def _add_required_group(parser): def _add_optional_group(parser): optional_group = parser.add_argument_group('optional arguments') - + optional_group.add_argument( + '-v', '--verbose', + action='http://github.com/count', + help='Increase logging verbosity. Can repeated e.g. -vvv', + ) optional_group.add_argument( '-a', '--adaptor', help='Choose a web framework adaptor: ' diff --git a/tests/usage_test.py b/tests/usage_test.py index 294004a4..cf79f6c5 100644 --- a/tests/usage_test.py +++ b/tests/usage_test.py @@ -25,7 +25,7 @@ def test_no_args(self): self.maxDiff = None - EXPECTED = """usage: python -m pyt [-h] [-a ADAPTOR] [-pr PROJECT_ROOT] + EXPECTED = """usage: python -m pyt [-h] [-v] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-t TRIGGER_WORD_FILE] [-m BLACKBOX_MAPPING_FILE] [-i] [-o OUTPUT_FILE] [--ignore-nosec] [-r] [-x EXCLUDED_PATHS] @@ -36,6 +36,7 @@ def test_no_args(self): targets source file(s) or directory(s) to be scanned optional arguments: + -v, --verbose Increase logging verbosity. Can repeated e.g. -vvv -a ADAPTOR, --adaptor ADAPTOR Choose a web framework adaptor: Flask(Default), Django, Every or Pylons @@ -74,7 +75,7 @@ def test_valid_args_but_no_targets(self): with capture_sys_output() as (_, stderr): parse_args(['-j']) - EXPECTED = """usage: python -m pyt [-h] [-a ADAPTOR] [-pr PROJECT_ROOT] + EXPECTED = """usage: python -m pyt [-h] [-v] [-a ADAPTOR] [-pr PROJECT_ROOT] [-b BASELINE_JSON_FILE] [-t TRIGGER_WORD_FILE] [-m BLACKBOX_MAPPING_FILE] [-i] [-o OUTPUT_FILE] [--ignore-nosec] [-r] [-x EXCLUDED_PATHS] From c223e13a45032906ed96503353abeb2051e89d46 Mon Sep 17 00:00:00 2001 From: bcaller Date: Fri, 7 Sep 2018 11:13:26 +0100 Subject: [PATCH 256/291] More logging --- pyt/__main__.py | 2 ++ pyt/web_frameworks/framework_adaptor.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/pyt/__main__.py b/pyt/__main__.py index d952cbe3..8646bceb 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -125,7 +125,9 @@ def main(command_line_args=sys.argv[1:]): # noqa: C901 ) initialize_constraint_table(cfg_list) + log.info("Analysing") analyse(cfg_list) + log.info("Finding vulnerabilities") vulnerabilities = find_vulnerabilities( cfg_list, args.blackbox_mapping_file, diff --git a/pyt/web_frameworks/framework_adaptor.py b/pyt/web_frameworks/framework_adaptor.py index 2bc4d7ee..96d2a32f 100644 --- a/pyt/web_frameworks/framework_adaptor.py +++ b/pyt/web_frameworks/framework_adaptor.py @@ -1,6 +1,7 @@ """A generic framework adaptor that leaves route criteria to the caller.""" import ast +import logging from ..cfg import make_cfg from ..core.ast_helper import Arguments @@ -10,6 +11,8 @@ TaintedNode ) +log = logging.getLogger(__name__) + class FrameworkAdaptor(): """An engine that uses the template pattern to find all @@ -31,6 +34,7 @@ def __init__( def get_func_cfg_with_tainted_args(self, definition): """Build a function cfg and return it, with all arguments tainted.""" + log.debug("Getting CFG for %s", definition.name) func_cfg = make_cfg( definition.node, self.project_modules, From b2daf8b08cd72d355b266022e199dc0111325428 Mon Sep 17 00:00:00 2001 From: bcaller Date: Fri, 7 Sep 2018 11:08:34 +0100 Subject: [PATCH 257/291] Recursive function calls shouldn't raise RecursionError Store a stack of definitions. If you revisit a function, treat it as a blackbox. The non-recursive return values should still propagate. --- pyt/cfg/expr_visitor.py | 9 +++++++++ tests/cfg/cfg_test.py | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/pyt/cfg/expr_visitor.py b/pyt/cfg/expr_visitor.py index 6623d717..57537875 100644 --- a/pyt/cfg/expr_visitor.py +++ b/pyt/cfg/expr_visitor.py @@ -1,4 +1,5 @@ import ast +import logging from .alias_helper import handle_aliases_in_calls from ..core.ast_helper import ( @@ -30,6 +31,8 @@ from .stmt_visitor import StmtVisitor from .stmt_visitor_helper import CALL_IDENTIFIER +log = logging.getLogger(__name__) + class ExprVisitor(StmtVisitor): def __init__( @@ -52,6 +55,7 @@ def __init__( self.undecided = False self.function_names = list() self.function_return_stack = list() + self.function_definition_stack = list() # used to avoid recursion self.module_definitions_stack = list() self.prev_nodes_to_avoid = list() self.last_control_flow_nodes = list() @@ -543,6 +547,7 @@ def process_function(self, call_node, definition): first_node ) self.function_return_stack.pop() + self.function_definition_stack.pop() return self.nodes[-1] @@ -560,11 +565,15 @@ def visit_Call(self, node): last_attribute = _id.rpartition('.')[-1] if definition: + if definition in self.function_definition_stack: + log.debug("Recursion encountered in function %s", _id) + return self.add_blackbox_or_builtin_call(node, blackbox=True) if isinstance(definition.node, ast.ClassDef): self.add_blackbox_or_builtin_call(node, blackbox=False) elif isinstance(definition.node, ast.FunctionDef): self.undecided = False self.function_return_stack.append(_id) + self.function_definition_stack.append(definition) return self.process_function(node, definition) else: raise Exception('Definition was neither FunctionDef or ' + diff --git a/tests/cfg/cfg_test.py b/tests/cfg/cfg_test.py index 3c215983..ef478396 100644 --- a/tests/cfg/cfg_test.py +++ b/tests/cfg/cfg_test.py @@ -3,6 +3,7 @@ from .cfg_base_test_case import CFGBaseTestCase from pyt.core.node_types import ( + BBorBInode, EntryOrExitNode, Node ) @@ -1389,6 +1390,13 @@ def test_call_on_call(self): path = 'examples/example_inputs/call_on_call.py' self.cfg_create_from_file(path) + def test_recursive_function(self): + path = 'examples/example_inputs/recursive.py' + self.cfg_create_from_file(path) + recursive_call = self.cfg.nodes[7] + assert recursive_call.label == '~call_3 = ret_rec(wat)' + assert isinstance(recursive_call, BBorBInode) # Not RestoreNode + class CFGCallWithAttributeTest(CFGBaseTestCase): def setUp(self): From 093f506dde3d7c8ef0f87ed1de6b86d837865de1 Mon Sep 17 00:00:00 2001 From: bcaller Date: Fri, 7 Sep 2018 11:29:49 +0100 Subject: [PATCH 258/291] Add test for vulnerabilities in recursive functions recur_but_no_propagation is actually safe, but it would be difficult to reliably determine this, so we'll have to do with the false positive at least for now (as recursive functions can call other recursive functions). --- examples/vulnerable_code/recursive.py | 32 +++++++++++++++++++ tests/main_test.py | 4 +-- tests/vulnerabilities/vulnerabilities_test.py | 5 +++ 3 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 examples/vulnerable_code/recursive.py diff --git a/examples/vulnerable_code/recursive.py b/examples/vulnerable_code/recursive.py new file mode 100644 index 00000000..d2cd6163 --- /dev/null +++ b/examples/vulnerable_code/recursive.py @@ -0,0 +1,32 @@ +from flask import Flask, request + +app = Flask(__name__) + + +def recur_without_any_propagation(x): + if len(x) < 20: + return recur_without_any_propagation("a" * 24) + return "Done" + + +def recur_no_propagation_false_positive(x): + if len(x) < 20: + return recur_no_propagation_false_positive(x + "!") + return "Done" + + +def recur_with_propagation(x): + if len(x) < 20: + return recur_with_propagation(x + "!") + return x + + +@app.route('/recursive') +def route(): + param = request.args.get('param', 'not set') + repeated_completely_untainted = recur_without_any_propagation(param) + app.db.execute(repeated_completely_untainted) + repeated_untainted = recur_no_propagation_false_positive(param) + app.db.execute(repeated_untainted) + repeated_tainted = recur_with_propagation(param) + app.db.execute(repeated_tainted) diff --git a/tests/main_test.py b/tests/main_test.py index 1e33ee24..bc985629 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -108,11 +108,11 @@ def test_targets_with_recursive(self): excluded_files = "" included_files = discover_files(targets, excluded_files, True) - self.assertEqual(len(included_files), 31) + self.assertEqual(len(included_files), 32) def test_targets_with_recursive_and_excluded(self): targets = ["examples/vulnerable_code/"] excluded_files = "inter_command_injection.py" included_files = discover_files(targets, excluded_files, True) - self.assertEqual(len(included_files), 30) + self.assertEqual(len(included_files), 31) diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 52fbfb2c..8f5e70f1 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -465,6 +465,11 @@ def assert_vulnerable(fixture): assert_vulnerable('result = repr(str("%s" % TAINT.lower().upper()))') assert_vulnerable('result = repr(str("{}".format(TAINT.lower())))') + def test_recursion(self): + # Really this file only has one vulnerability, but for now it's safer to keep the false positive. + vulnerabilities = self.run_analysis('examples/vulnerable_code/recursive.py') + self.assert_length(vulnerabilities, expected_length=2) + class EngineDjangoTest(VulnerabilitiesBaseTestCase): def run_analysis(self, path): From 5d7a94b417b97b7fd56b9b15cc5e12d685dd4d84 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 11 Sep 2018 18:23:33 -0700 Subject: [PATCH 259/291] [version] Bump to 0.40 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9809fcdd..17f2c8f5 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -VERSION = '0.39' +VERSION = '0.40' setup( From 2e4f8c9c884f8b072f7ae4319047c3aa5bd3eed6 Mon Sep 17 00:00:00 2001 From: bcaller Date: Mon, 29 Oct 2018 15:39:11 +0000 Subject: [PATCH 260/291] Better handling of IfExp (ternary) Reduces false positives. As an example: result = "a" if TAINT else "c" In AST, the assignment value is `IfExp(test=TAINT, body="a", orelse="c")`. Even though `TAINT` is inside the assignment of `result`, it can't actually taint `result` as it is part of the boolean test expression. Previously, `result` would have been tainted, which was a false positive. We don't want to completely ignore the test though in case it contains a sink function. Therefore, if the test contains expressions we transform it as so: result = "a" if b(c) + 2 else "d" to the multi line: __if_exp_0 = b(c) + 2 result = "a" if __if_exp_0 else "d" This way if `b` is a sink and `c` is tainted we see a vulnerability, but even if `c` is tainted we don't taint `result`. --- examples/example_inputs/ternary.py | 9 +++ pyt/core/transformer.py | 77 ++++++++++++++++++- pyt/helper_visitors/label_visitor.py | 9 +++ .../right_hand_side_visitor.py | 5 ++ tests/cfg/cfg_test.py | 33 ++++++++ tests/core/transformer_test.py | 16 ++++ tests/helper_visitors/label_visitor_test.py | 4 + 7 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 examples/example_inputs/ternary.py diff --git a/examples/example_inputs/ternary.py b/examples/example_inputs/ternary.py new file mode 100644 index 00000000..fe0914e4 --- /dev/null +++ b/examples/example_inputs/ternary.py @@ -0,0 +1,9 @@ +result = ( + "abc" + if t.u == v.w else + "def" + if x else + y # This is the only RHS variable which taints result + if func(z if 1 + 1 == 2 else z) else + "ghi" +) diff --git a/pyt/core/transformer.py b/pyt/core/transformer.py index ccdd82c4..5084bae6 100644 --- a/pyt/core/transformer.py +++ b/pyt/core/transformer.py @@ -64,5 +64,80 @@ def visit_Return(self, node): return self.visit_chain(node) -class PytTransformer(AsyncTransformer, ChainedFunctionTransformer, ast.NodeTransformer): +class IfExpRewriter(ast.NodeTransformer): + """Splits IfExp ternary expressions containing complex tests into multiple statements + + Will change + + a if b(c) else d + + into + + a if __if_exp_0 else d + + with Assign nodes in assignments [__if_exp_0 = b(c)] + """ + + def __init__(self, starting_index=0): + self._temporary_variable_index = starting_index + self.assignments = [] + super().__init__() + + def visit_IfExp(self, node): + if isinstance(node.test, (ast.Name, ast.Attribute)): + return self.generic_visit(node) + else: + temp_var_id = '__if_exp_{}'.format(self._temporary_variable_index) + self._temporary_variable_index += 1 + assignment_of_test = ast.Assign( + targets=[ast.Name(id=temp_var_id, ctx=ast.Store())], + value=self.visit(node.test), + ) + ast.copy_location(assignment_of_test, node) + self.assignments.append(assignment_of_test) + transformed_if_exp = ast.IfExp( + test=ast.Name(id=temp_var_id, ctx=ast.Load()), + body=self.visit(node.body), + orelse=self.visit(node.orelse), + ) + ast.copy_location(transformed_if_exp, node) + return transformed_if_exp + + def visit_FunctionDef(self, node): + return node + + +class IfExpTransformer: + """Goes through module and function bodies, adding extra Assign nodes due to IfExp expressions.""" + + def visit_body(self, nodes): + new_nodes = [] + count = 0 + for node in nodes: + rewriter = IfExpRewriter(count) + possibly_transformed_node = rewriter.visit(node) + if rewriter.assignments: + new_nodes.extend(rewriter.assignments) + count += len(rewriter.assignments) + new_nodes.append(possibly_transformed_node) + return new_nodes + + def visit_FunctionDef(self, node): + transformed = ast.FunctionDef( + name=node.name, + args=node.args, + body=self.visit_body(node.body), + decorator_list=node.decorator_list, + returns=node.returns + ) + ast.copy_location(transformed, node) + return self.generic_visit(transformed) + + def visit_Module(self, node): + transformed = ast.Module(self.visit_body(node.body)) + ast.copy_location(transformed, node) + return self.generic_visit(transformed) + + +class PytTransformer(AsyncTransformer, IfExpTransformer, ChainedFunctionTransformer, ast.NodeTransformer): pass diff --git a/pyt/helper_visitors/label_visitor.py b/pyt/helper_visitors/label_visitor.py index 3be85ba4..e8e2d74a 100644 --- a/pyt/helper_visitors/label_visitor.py +++ b/pyt/helper_visitors/label_visitor.py @@ -324,3 +324,12 @@ def visit_FormattedValue(self, node): def visit_Starred(self, node): self.result += '*' self.visit(node.value) + + def visit_IfExp(self, node): + self.result += '(' + self.visit(node.test) + self.result += ') ? (' + self.visit(node.body) + self.result += ') : (' + self.visit(node.orelse) + self.result += ')' diff --git a/pyt/helper_visitors/right_hand_side_visitor.py b/pyt/helper_visitors/right_hand_side_visitor.py index 629a94bb..02c96618 100644 --- a/pyt/helper_visitors/right_hand_side_visitor.py +++ b/pyt/helper_visitors/right_hand_side_visitor.py @@ -22,6 +22,11 @@ def visit_Call(self, node): for keyword in node.keywords: self.visit(keyword) + def visit_IfExp(self, node): + # The test doesn't taint the assignment + self.visit(node.body) + self.visit(node.orelse) + @classmethod def result_for_node(cls, node): visitor = cls() diff --git a/tests/cfg/cfg_test.py b/tests/cfg/cfg_test.py index ef478396..a4c24ba5 100644 --- a/tests/cfg/cfg_test.py +++ b/tests/cfg/cfg_test.py @@ -580,6 +580,39 @@ def test_if_not(self): (_exit, _if) ]) + def test_ternary_ifexp(self): + self.cfg_create_from_file('examples/example_inputs/ternary.py') + + # entry = 0 + tmp_if_1 = 1 + # tmp_if_inner = 2 + call = 3 + # tmp_if_call = 4 + actual_if_exp = 5 + exit = 6 + + self.assert_length(self.cfg.nodes, expected_length=exit + 1) + self.assertInCfg([ + (i + 1, i) for i in range(exit) + ]) + + self.assertCountEqual( + self.cfg.nodes[actual_if_exp].right_hand_side_variables, + ['y'], + "The variables in the test expressions shouldn't appear as RHS variables" + ) + + self.assertCountEqual( + self.cfg.nodes[tmp_if_1].right_hand_side_variables, + ['t', 'v'], + ) + + self.assertIn( + 'ret_func(', + self.cfg.nodes[call].label, + "Function calls inside the test expressions should still appear in the CFG", + ) + class CFGWhileTest(CFGBaseTestCase): diff --git a/tests/core/transformer_test.py b/tests/core/transformer_test.py index 2676c70d..95213706 100644 --- a/tests/core/transformer_test.py +++ b/tests/core/transformer_test.py @@ -52,3 +52,19 @@ def test_chained_function(self): transformed = PytTransformer().visit(chained_tree) self.assertEqual(ast.dump(transformed), ast.dump(separated_tree)) + + def test_if_exp(self): + complex_if_exp_tree = ast.parse("\n".join([ + "def a():", + " b = c if d.e(f) else g if h else i if j.k(l) else m", + ])) + + separated_tree = ast.parse("\n".join([ + "def a():", + " __if_exp_0 = d.e(f)", + " __if_exp_1 = j.k(l)", + " b = c if __if_exp_0 else g if h else i if __if_exp_1 else m", + ])) + + transformed = PytTransformer().visit(complex_if_exp_tree) + self.assertEqual(ast.dump(transformed), ast.dump(separated_tree)) diff --git a/tests/helper_visitors/label_visitor_test.py b/tests/helper_visitors/label_visitor_test.py index 2a7f0857..d4b77794 100644 --- a/tests/helper_visitors/label_visitor_test.py +++ b/tests/helper_visitors/label_visitor_test.py @@ -83,3 +83,7 @@ def test_joined_str_with_format_spec(self): def test_starred(self): label = self.perform_labeling_on_expression('[a, *b] = *c, d') self.assertEqual(label.result, '[a, *b] = (*c, d)') + + def test_if_exp(self): + label = self.perform_labeling_on_expression('a = b if c else d') + self.assertEqual(label.result, 'a = (c) ? (b) : (d)') From 23c186fb27ec2db7aba627c08727cea71ec9f3f8 Mon Sep 17 00:00:00 2001 From: bcaller Date: Wed, 31 Oct 2018 17:57:24 +0000 Subject: [PATCH 261/291] Simplify trigger file for sink argument propagation This changes the schema of the trigger file. Previously there were too many options and it was confusing. My fault, sorry. This meant that `db.execute(query, **TAINT)` was marked as a vulnerability whereas `db.execute(text=query, **TAINT)` wasn't. Neither are vulnerabilities, so this gave a FALSE POSITIVE. Now we have `arg_dict` which is a dictionary of keyword to argument position. E.g. for `def f(a, b, *, c)` we can specify the arg_dict as: ``` {"a": 0, "b": 1, "c": null} ``` if we want them all to propagate or not propagate depending on the `unlisted_args_propagate` value. This way, we can more easily define db.execute as: ``` "execute(": { "unlisted_args_propagate": false, "arg_dict": { "text": 0 } }, ``` --- .../trigger_definitions_parser.py | 27 +++++++++++-------- pyt/vulnerabilities/vulnerabilities.py | 20 +++++++------- .../test_positions.pyt | 21 +++++---------- tests/vulnerabilities/vulnerabilities_test.py | 1 + 4 files changed, 33 insertions(+), 36 deletions(-) diff --git a/pyt/vulnerabilities/trigger_definitions_parser.py b/pyt/vulnerabilities/trigger_definitions_parser.py index ab737928..4bfd3a15 100644 --- a/pyt/vulnerabilities/trigger_definitions_parser.py +++ b/pyt/vulnerabilities/trigger_definitions_parser.py @@ -16,33 +16,38 @@ class Sink: def __init__( self, trigger, *, - unlisted_args_propagate=True, unlisted_kwargs_propagate=True, - arg_list=None, kwarg_list=None, - sanitisers=None + unlisted_args_propagate=True, + arg_dict=None, + sanitisers=None, ): self._trigger = trigger self.sanitisers = sanitisers or [] self.arg_list_propagates = not unlisted_args_propagate - self.kwarg_list_propagates = not unlisted_kwargs_propagate if trigger[-1] != '(': - if self.arg_list_propagates or self.kwarg_list_propagates or arg_list or kwarg_list: + if self.arg_list_propagates or arg_dict: raise ValueError("Propagation options specified, but trigger word isn't a function call") - self.arg_list = set(arg_list or ()) - self.kwarg_list = set(kwarg_list or ()) + arg_dict = {} if arg_dict is None else arg_dict + self.arg_position_to_kwarg = { + position: name for name, position in arg_dict.items() if position is not None + } + self.kwarg_list = set(arg_dict.keys()) def arg_propagates(self, index): - in_list = index in self.arg_list - return self.arg_list_propagates == in_list + kwarg = self.get_kwarg_from_position(index) + return self.kwarg_propagates(kwarg) def kwarg_propagates(self, keyword): in_list = keyword in self.kwarg_list - return self.kwarg_list_propagates == in_list + return self.arg_list_propagates == in_list + + def get_kwarg_from_position(self, index): + return self.arg_position_to_kwarg.get(index) @property def all_arguments_propagate_taint(self): - if self.arg_list or self.kwarg_list: + if self.kwarg_list: return False return True diff --git a/pyt/vulnerabilities/vulnerabilities.py b/pyt/vulnerabilities/vulnerabilities.py index 47986ed8..0daca2cd 100644 --- a/pyt/vulnerabilities/vulnerabilities.py +++ b/pyt/vulnerabilities/vulnerabilities.py @@ -243,29 +243,27 @@ def get_sink_args(cfg_node): def get_sink_args_which_propagate(sink, ast_node): sink_args_with_positions = CallVisitor.get_call_visit_results(sink.trigger.call, ast_node) sink_args = [] + kwargs_present = set() for i, vars in enumerate(sink_args_with_positions.args): - if sink.trigger.arg_propagates(i): + kwarg = sink.trigger.get_kwarg_from_position(i) + if kwarg: + kwargs_present.add(kwarg) + if sink.trigger.kwarg_propagates(kwarg): sink_args.extend(vars) - if ( - # Either any unspecified arg propagates - not sink.trigger.arg_list_propagates or - # or there are some propagating args which weren't passed positionally - any(1 for position in sink.trigger.arg_list if position >= len(sink_args_with_positions.args)) - ): - sink_args.extend(sink_args_with_positions.unknown_args) - for keyword, vars in sink_args_with_positions.kwargs.items(): + kwargs_present.add(keyword) if sink.trigger.kwarg_propagates(keyword): sink_args.extend(vars) if ( # Either any unspecified kwarg propagates - not sink.trigger.kwarg_list_propagates or + not sink.trigger.arg_list_propagates or # or there are some propagating kwargs which have not been passed by keyword - sink.trigger.kwarg_list - set(sink_args_with_positions.kwargs.keys()) + sink.trigger.kwarg_list - kwargs_present ): + sink_args.extend(sink_args_with_positions.unknown_args) sink_args.extend(sink_args_with_positions.unknown_kwargs) return sink_args diff --git a/pyt/vulnerability_definitions/test_positions.pyt b/pyt/vulnerability_definitions/test_positions.pyt index 48e276fe..ddbc20a8 100644 --- a/pyt/vulnerability_definitions/test_positions.pyt +++ b/pyt/vulnerability_definitions/test_positions.pyt @@ -7,22 +7,15 @@ "normal(": {}, "execute(": { "unlisted_args_propagate": false, - "arg_list": [ - 0 - ], - "unlisted_kwargs_propagate": false, - "kwarg_list": [ - "text" - ] + "arg_dict": { + "text": 0 + } }, "run(": { - "kwarg_list": [ - "non_propagating" - ], - "arg_list": [ - 2, - 3 - ] + "arg_dict": { + "non_propagating": 2, + "something_else": 3 + } } } } diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 8f5e70f1..2846ab55 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -577,6 +577,7 @@ def check(fixture, vulnerable): 'execute(x, name=TAINT)', 'execute(x, *TAINT)', 'execute(text=x, **TAINT)', + 'execute(x, **TAINT)', 'dont_run(TAINT)', ) vuln_fixtures = ( From 7847d01a3ef28322fcb1de18d67c30586d290473 Mon Sep 17 00:00:00 2001 From: bcaller Date: Wed, 31 Oct 2018 16:03:07 +0000 Subject: [PATCH 262/291] list is tainted by calling list.append(TAINT) Taint is propagated by: ``` list += [TAINT] list = list + TAINT ``` but with lists we often use a function to mutate the list: ``` list = [] list.append(TAINT) list.insert(0, TAINT) list.extend(TAINT) ``` Previously this didn't taint `list` so we had FALSE NEGATIVES. Now `list.append(TAINT)` is treated like augmented assignment, so list will be tainted. `list += list.append(TAINT)` Of course this wouldn't work as real code since `append` returns `None` but it is how you can think about this function which mutates `list`. The same goes for `set.add()`, `list.extend()`, `list.insert()`, `dict.update()`, although we aren't actually doing type checking, just looking at the name of the method. --- examples/vulnerable_code/list_append.py | 13 +++++++++++++ pyt/cfg/expr_visitor.py | 19 +++++++++++++++++++ pyt/cfg/expr_visitor_helper.py | 7 +++++++ tests/main_test.py | 4 ++-- tests/vulnerabilities/vulnerabilities_test.py | 18 +++++++++++++++--- 5 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 examples/vulnerable_code/list_append.py diff --git a/examples/vulnerable_code/list_append.py b/examples/vulnerable_code/list_append.py new file mode 100644 index 00000000..9c4bbd63 --- /dev/null +++ b/examples/vulnerable_code/list_append.py @@ -0,0 +1,13 @@ +import os + +from flask import request + + +def func(): + TAINT = request.args.get("TAINT") + + cmd = [] + cmd.append("echo") + cmd.append(TAINT) + + os.system(" ".join(cmd)) diff --git a/pyt/cfg/expr_visitor.py b/pyt/cfg/expr_visitor.py index 57537875..d49be28e 100644 --- a/pyt/cfg/expr_visitor.py +++ b/pyt/cfg/expr_visitor.py @@ -21,6 +21,7 @@ ) from .expr_visitor_helper import ( BUILTINS, + MUTATORS, return_connection_handler, SavedVariable ) @@ -59,6 +60,7 @@ def __init__( self.module_definitions_stack = list() self.prev_nodes_to_avoid = list() self.last_control_flow_nodes = list() + self._within_mutating_call = False # Are we already in a module? if module_definitions: @@ -578,6 +580,23 @@ def visit_Call(self, node): else: raise Exception('Definition was neither FunctionDef or ' + 'ClassDef, cannot add the function ') + elif ( + not self._within_mutating_call and + last_attribute in MUTATORS + and isinstance(node.func, ast.Attribute) + ): + # Change list.append(x) ---> list += list.append(x) + # This does in fact propagate as we don't know that append returns None + fake_aug_assign = ast.AugAssign( + target=node.func.value, + op=ast.Add, + value=node, + ) + ast.copy_location(fake_aug_assign, node) + self._within_mutating_call = True # Don't do this recursively + result = self.visit(fake_aug_assign) + self._within_mutating_call = False + return result elif last_attribute not in BUILTINS: # Mark the call as a blackbox because we don't have the definition return self.add_blackbox_or_builtin_call(node, blackbox=True) diff --git a/pyt/cfg/expr_visitor_helper.py b/pyt/cfg/expr_visitor_helper.py index 9667f7c1..a0df3c90 100644 --- a/pyt/cfg/expr_visitor_helper.py +++ b/pyt/cfg/expr_visitor_helper.py @@ -31,6 +31,13 @@ 'flash', 'jsonify' ) +MUTATORS = ( # list.append(x) taints list if x is tainted + 'add', + 'append', + 'extend', + 'insert', + 'update', +) def return_connection_handler(nodes, exit_node): diff --git a/tests/main_test.py b/tests/main_test.py index bc985629..561d8bd1 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -108,11 +108,11 @@ def test_targets_with_recursive(self): excluded_files = "" included_files = discover_files(targets, excluded_files, True) - self.assertEqual(len(included_files), 32) + self.assertEqual(len(included_files), 33) def test_targets_with_recursive_and_excluded(self): targets = ["examples/vulnerable_code/"] excluded_files = "inter_command_injection.py" included_files = discover_files(targets, excluded_files, True) - self.assertEqual(len(included_files), 31) + self.assertEqual(len(included_files), 32) diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 8f5e70f1..a621f4d4 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -110,12 +110,17 @@ def test_build_sanitiser_node_dict(self): self.assertEqual(sanitiser_dict['escape'][0], cfg.nodes[3]) - def run_analysis(self, path=None): + def run_analysis( + self, + path=None, + adaptor_function=is_flask_route_function, + trigger_file=default_trigger_word_file, + ): if path: self.cfg_create_from_file(path) cfg_list = [self.cfg] - FrameworkAdaptor(cfg_list, [], [], is_flask_route_function) + FrameworkAdaptor(cfg_list, [], [], adaptor_function) initialize_constraint_table(cfg_list) analyse(cfg_list) @@ -123,7 +128,7 @@ def run_analysis(self, path=None): return find_vulnerabilities( cfg_list, default_blackbox_mapping_file, - default_trigger_word_file + trigger_file, ) def test_find_vulnerabilities_assign_other_var(self): @@ -470,6 +475,13 @@ def test_recursion(self): vulnerabilities = self.run_analysis('examples/vulnerable_code/recursive.py') self.assert_length(vulnerabilities, expected_length=2) + def test_list_append_taints_list(self): + vulnerabilities = self.run_analysis( + 'examples/vulnerable_code/list_append.py', + adaptor_function=is_function, + ) + self.assert_length(vulnerabilities, expected_length=1) + class EngineDjangoTest(VulnerabilitiesBaseTestCase): def run_analysis(self, path): From 3b885ec73bdfefd80b5ca2860033d2f8a9298e81 Mon Sep 17 00:00:00 2001 From: bcaller Date: Thu, 1 Nov 2018 15:33:18 +0000 Subject: [PATCH 263/291] [version] Bump to 0.41 --- CHANGELOG.md | 32 ++++++++++++++++++++++++++++++++ setup.py | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2aefd0ec..6609a582 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,38 @@ If you love PyT, please star our project on GitHub to show your support! :star: [#xxxx]: https://github.com/python-security/pyt/pull/xxxx [@xxxx]: https://github.com/xxxx --> +# 0.41 +##### November 1st, 2018 + +#### :boom: Breaking Changes +* Changed trigger file format when specifying specific tainted args ([#182]) + +#### :tada: New Features +* Function calls such as `list.append` and `dict.update` now propagate taint to the list or dict ([#181]) + +#### :bug: Bugfixes +* IfExp (or ternary) expression handling improved ([#179]) + +# 0.40 +##### September 11th, 2018 + +#### :mega: Release Highlights +* Logging changes. Logging verbosity can be changed with `-v` to `-vvv` ([#172]) + +#### :boom: Breaking Changes +* Removed `--trim` option ([#169]) + +#### :tada: New Features +* Added `--only-unsanitised` flag to not print sanitised vulnerabilities ([#172]) + +#### :bug: Bugfixes +* Recursive functions don't cause `RecursionError` ([#173]) +* Handling of chained functions improved ([#171]) + +# 0.39 +##### August 21st, 2018 + +... # 0.38 ##### August 2nd, 2018 diff --git a/setup.py b/setup.py index 17f2c8f5..a597a9a7 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -VERSION = '0.40' +VERSION = '0.41' setup( From 974e84266d7a282383b06da8cc73233fdeb287b0 Mon Sep 17 00:00:00 2001 From: bcaller Date: Thu, 1 Nov 2018 15:57:50 +0000 Subject: [PATCH 264/291] [version] Bump 0.42 No actual code changes here. I accidentally uploaded some vim undo files inside the package added to pypi for 0.41. --- CHANGELOG.md | 2 +- MANIFEST.in | 3 ++- setup.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6609a582..928f88cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,7 @@ If you love PyT, please star our project on GitHub to show your support! :star: [#xxxx]: https://github.com/python-security/pyt/pull/xxxx [@xxxx]: https://github.com/xxxx --> -# 0.41 +# 0.42 ##### November 1st, 2018 #### :boom: Breaking Changes diff --git a/MANIFEST.in b/MANIFEST.in index d1c8d23a..d859a883 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ -graft pyt/vulnerability_definitions +include pyt/vulnerability_definitions/*.pyt +include pyt/vulnerability_definitions/*.json diff --git a/setup.py b/setup.py index a597a9a7..102837cf 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -VERSION = '0.41' +VERSION = '0.42' setup( From e10f850bd879b8f5c1627919410e369164423583 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Thu, 8 Nov 2018 17:08:24 -0800 Subject: [PATCH 265/291] Add link to AMF Ranked #1 on [Givewell](https://www.givewell.org/charities/top-charities) --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index ad79c794..80ac92a5 100644 --- a/README.rst +++ b/README.rst @@ -16,6 +16,9 @@ .. image:: https://img.shields.io/badge/python-v3.6-blue.svg :target: https://pypi.org/project/python-taint/ +.. image:: https://img.shields.io/badge/Donate-Charity-orange.svg + :target: https://www.againstmalaria.com/donation.aspx + Python Taint ============ From ce56a20731de1b6245fd76555bdab00fc9fa07fb Mon Sep 17 00:00:00 2001 From: KevinHock Date: Thu, 8 Nov 2018 17:52:11 -0800 Subject: [PATCH 266/291] Fix link for PRs Welcome Badge --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 80ac92a5..025a3705 100644 --- a/README.rst +++ b/README.rst @@ -11,7 +11,7 @@ :target: https://badge.fury.io/py/python-taint .. image:: https://img.shields.io/badge/PRs-welcome-ff69b4.svg - :target: https://github.com/python-security/pyt/issues?q=is%3Aopen+is%3Aissue+label%3Agood-first-issue + :target: https://github.com/python-security/pyt/issues?utf8=%E2%9C%93&q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22+ .. image:: https://img.shields.io/badge/python-v3.6-blue.svg :target: https://pypi.org/project/python-taint/ From e704c21116bd9248312a54a5aeed77634fb22345 Mon Sep 17 00:00:00 2001 From: Adrian Bravo Date: Mon, 19 Nov 2018 16:05:47 -0800 Subject: [PATCH 267/291] 133: Visit functions in while test --- .../example_inputs/while_func_comparator.py | 6 ++++ pyt/cfg/stmt_visitor.py | 9 ++++-- tests/cfg/cfg_test.py | 32 +++++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 examples/example_inputs/while_func_comparator.py diff --git a/examples/example_inputs/while_func_comparator.py b/examples/example_inputs/while_func_comparator.py new file mode 100644 index 00000000..6aafc2b6 --- /dev/null +++ b/examples/example_inputs/while_func_comparator.py @@ -0,0 +1,6 @@ +def foo(): + return 6 + +while x < foo(): + print(x) + x += 1 diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index 95913211..ab45707a 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -565,13 +565,18 @@ def visit_While(self, node): label_visitor = LabelVisitor() label_visitor.visit(node.test) - test = self.append_node(Node( + while_node = self.append_node(Node( 'while ' + label_visitor.result + ':', node, path=self.filenames[-1] )) - return self.loop_node_skeleton(test, node) + for comp in node.test.comparators: + if isinstance(comp, ast.Call) and get_call_names_as_string(comp.func) in self.function_names: + last_node = self.visit(comp) + last_node.connect(while_node) + + return self.loop_node_skeleton(while_node, node) def add_blackbox_or_builtin_call(self, node, blackbox): # noqa: C901 """Processes a blackbox or builtin function when it is called. diff --git a/tests/cfg/cfg_test.py b/tests/cfg/cfg_test.py index a4c24ba5..cfdc056a 100644 --- a/tests/cfg/cfg_test.py +++ b/tests/cfg/cfg_test.py @@ -684,6 +684,38 @@ def test_while_line_numbers(self): self.assertLineNumber(else_body_2, 6) self.assertLineNumber(next_stmt, 7) + def test_while_func_iterator(self): + self.cfg_create_from_file('examples/example_inputs/while_func_comparator.py') + + self.assert_length(self.cfg.nodes, expected_length=9) + + entry = 0 + test = 1 + entry_foo = 2 + ret_foo = 3 + exit_foo = 4 + call_foo = 5 + _print = 6 + body_1 = 7 + _exit = 8 + + self.assertEqual(self.cfg.nodes[test].label, 'while x < foo():') + + self.assertInCfg([ + (test, entry), + (entry_foo, test), + (_print, test), + (_exit, test), + (body_1, _print), + + (test, body_1), + (test, call_foo), + (ret_foo, entry_foo), + (exit_foo, ret_foo), + (call_foo, exit_foo), + + ]) + class CFGAssignmentMultiTest(CFGBaseTestCase): def test_assignment_multi_target(self): From effd87248cfba31318d61c72b3a0862e5313345a Mon Sep 17 00:00:00 2001 From: Adrian Bravo Date: Tue, 20 Nov 2018 10:03:54 -0800 Subject: [PATCH 268/291] 133: Support for LHS functions and no comparison while tests --- .../example_inputs/while_func_comparator.py | 4 +- .../while_func_comparator_lhs.py | 6 +++ .../while_func_comparator_rhs.py | 6 +++ pyt/cfg/stmt_visitor.py | 18 +++++-- tests/cfg/cfg_test.py | 51 +++++++++++++++++-- 5 files changed, 76 insertions(+), 9 deletions(-) create mode 100644 examples/example_inputs/while_func_comparator_lhs.py create mode 100644 examples/example_inputs/while_func_comparator_rhs.py diff --git a/examples/example_inputs/while_func_comparator.py b/examples/example_inputs/while_func_comparator.py index 6aafc2b6..8c775f72 100644 --- a/examples/example_inputs/while_func_comparator.py +++ b/examples/example_inputs/while_func_comparator.py @@ -1,6 +1,6 @@ def foo(): - return 6 + return True -while x < foo(): +while foo(): print(x) x += 1 diff --git a/examples/example_inputs/while_func_comparator_lhs.py b/examples/example_inputs/while_func_comparator_lhs.py new file mode 100644 index 00000000..1904e8e7 --- /dev/null +++ b/examples/example_inputs/while_func_comparator_lhs.py @@ -0,0 +1,6 @@ +def foo(): + return 6 + +while foo() > x: + print(x) + x += 1 diff --git a/examples/example_inputs/while_func_comparator_rhs.py b/examples/example_inputs/while_func_comparator_rhs.py new file mode 100644 index 00000000..6aafc2b6 --- /dev/null +++ b/examples/example_inputs/while_func_comparator_rhs.py @@ -0,0 +1,6 @@ +def foo(): + return 6 + +while x < foo(): + print(x) + x += 1 diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index ab45707a..965b24eb 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -563,7 +563,8 @@ def visit_For(self, node): def visit_While(self, node): label_visitor = LabelVisitor() - label_visitor.visit(node.test) + test = node.test # the test condition of the while loop + label_visitor.visit(test) while_node = self.append_node(Node( 'while ' + label_visitor.result + ':', @@ -571,11 +572,20 @@ def visit_While(self, node): path=self.filenames[-1] )) - for comp in node.test.comparators: - if isinstance(comp, ast.Call) and get_call_names_as_string(comp.func) in self.function_names: - last_node = self.visit(comp) + def process_comparator(comp_n): + if isinstance(comp_n, ast.Call) and get_call_names_as_string(comp_n.func) in self.function_names: + last_node = self.visit(comp_n) last_node.connect(while_node) + if isinstance(test, ast.Compare): + comparators = test.comparators + comparators.append(test.left) # quirk. See https://greentreesnakes.readthedocs.io/en/latest/nodes.html#Compare + + for comp in comparators: + process_comparator(comp) + else: # while foo(): + process_comparator(test) + return self.loop_node_skeleton(while_node, node) def add_blackbox_or_builtin_call(self, node, blackbox): # noqa: C901 diff --git a/tests/cfg/cfg_test.py b/tests/cfg/cfg_test.py index cfdc056a..bf47275e 100644 --- a/tests/cfg/cfg_test.py +++ b/tests/cfg/cfg_test.py @@ -684,7 +684,7 @@ def test_while_line_numbers(self): self.assertLineNumber(else_body_2, 6) self.assertLineNumber(next_stmt, 7) - def test_while_func_iterator(self): + def test_while_func_comparator(self): self.cfg_create_from_file('examples/example_inputs/while_func_comparator.py') self.assert_length(self.cfg.nodes, expected_length=9) @@ -699,6 +699,23 @@ def test_while_func_iterator(self): body_1 = 7 _exit = 8 + self.assertEqual(self.cfg.nodes[test].label, 'while foo():') + + def test_while_func_comparator_rhs(self): + self.cfg_create_from_file('examples/example_inputs/while_func_comparator_rhs.py') + + self.assert_length(self.cfg.nodes, expected_length=9) + + entry = 0 + test = 1 + entry_foo = 2 + ret_foo = 3 + exit_foo = 4 + call_foo = 5 + _print = 6 + body_1 = 7 + _exit = 8 + self.assertEqual(self.cfg.nodes[test].label, 'while x < foo():') self.assertInCfg([ @@ -707,13 +724,41 @@ def test_while_func_iterator(self): (_print, test), (_exit, test), (body_1, _print), - (test, body_1), (test, call_foo), (ret_foo, entry_foo), (exit_foo, ret_foo), - (call_foo, exit_foo), + (call_foo, exit_foo) + ]) + + def test_while_func_comparator_lhs(self): + self.cfg_create_from_file('examples/example_inputs/while_func_comparator_lhs.py') + self.assert_length(self.cfg.nodes, expected_length=9) + + entry = 0 + test = 1 + entry_foo = 2 + ret_foo = 3 + exit_foo = 4 + call_foo = 5 + _print = 6 + body_1 = 7 + _exit = 8 + + self.assertEqual(self.cfg.nodes[test].label, 'while foo() > x:') + + self.assertInCfg([ + (test, entry), + (entry_foo, test), + (_print, test), + (_exit, test), + (body_1, _print), + (test, body_1), + (test, call_foo), + (ret_foo, entry_foo), + (exit_foo, ret_foo), + (call_foo, exit_foo) ]) From b52a8707b7fc598d1c7f59c785be0269d1b4a1d5 Mon Sep 17 00:00:00 2001 From: Adrian Bravo Date: Tue, 20 Nov 2018 10:17:20 -0800 Subject: [PATCH 269/291] 133: Fix style and complexity issues for Travis.ci --- pyt/cfg/stmt_visitor.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index 965b24eb..d3f8e2c8 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -555,15 +555,26 @@ def visit_For(self, node): path=self.filenames[-1] )) - if isinstance(node.iter, ast.Call) and get_call_names_as_string(node.iter.func) in self.function_names: - last_node = self.visit(node.iter) - last_node.connect(for_node) + self.process_loop_funcs(node.iter, for_node) return self.loop_node_skeleton(for_node, node) + def process_loop_funcs(self, comp_n, loop_node): + """ + If the loop test node contains function calls, it connects the loop node to the nodes of + those function calls. + + :param comp_n: The test node of a loop that may contain functions. + :param loop_node: The loop node itself to connect to the new function nodes if any + :return: None + """ + if isinstance(comp_n, ast.Call) and get_call_names_as_string(comp_n.func) in self.function_names: + last_node = self.visit(comp_n) + last_node.connect(loop_node) + def visit_While(self, node): label_visitor = LabelVisitor() - test = node.test # the test condition of the while loop + test = node.test # the test condition of the while loop label_visitor.visit(test) while_node = self.append_node(Node( @@ -572,19 +583,14 @@ def visit_While(self, node): path=self.filenames[-1] )) - def process_comparator(comp_n): - if isinstance(comp_n, ast.Call) and get_call_names_as_string(comp_n.func) in self.function_names: - last_node = self.visit(comp_n) - last_node.connect(while_node) - if isinstance(test, ast.Compare): comparators = test.comparators - comparators.append(test.left) # quirk. See https://greentreesnakes.readthedocs.io/en/latest/nodes.html#Compare + comparators.append(test.left) # quirk. See https://greentreesnakes.readthedocs.io/en/latest/nodes.html#Compare for comp in comparators: - process_comparator(comp) - else: # while foo(): - process_comparator(test) + self.process_loop_funcs(comp, while_node) + else: # while foo(): + self.process_loop_funcs(test, while_node) return self.loop_node_skeleton(while_node, node) From 6d25eae6ce969e1930846e6714445ee5ca870a47 Mon Sep 17 00:00:00 2001 From: Adrian Bravo Date: Tue, 20 Nov 2018 10:21:56 -0800 Subject: [PATCH 270/291] 133: Finished tests --- tests/cfg/cfg_test.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/cfg/cfg_test.py b/tests/cfg/cfg_test.py index bf47275e..3af37942 100644 --- a/tests/cfg/cfg_test.py +++ b/tests/cfg/cfg_test.py @@ -701,6 +701,19 @@ def test_while_func_comparator(self): self.assertEqual(self.cfg.nodes[test].label, 'while foo():') + self.assertInCfg([ + (test, entry), + (entry_foo, test), + (_print, test), + (_exit, test), + (body_1, _print), + (test, body_1), + (test, call_foo), + (ret_foo, entry_foo), + (exit_foo, ret_foo), + (call_foo, exit_foo) + ]) + def test_while_func_comparator_rhs(self): self.cfg_create_from_file('examples/example_inputs/while_func_comparator_rhs.py') From 9cb0b567c8cd0c66bcf9ac87ad1425d47a75b908 Mon Sep 17 00:00:00 2001 From: Adrian Bravo Date: Wed, 21 Nov 2018 10:12:19 -0800 Subject: [PATCH 271/291] 133: Avoid mutating node.test.comparators --- pyt/cfg/stmt_visitor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index d3f8e2c8..3b9d5f48 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -584,10 +584,10 @@ def visit_While(self, node): )) if isinstance(test, ast.Compare): - comparators = test.comparators - comparators.append(test.left) # quirk. See https://greentreesnakes.readthedocs.io/en/latest/nodes.html#Compare + # quirk. See https://greentreesnakes.readthedocs.io/en/latest/nodes.html#Compare + self.process_loop_funcs(test.left, while_node) - for comp in comparators: + for comp in test.comparators: self.process_loop_funcs(comp, while_node) else: # while foo(): self.process_loop_funcs(test, while_node) From aff6b6c166e73c7c4ad4f9a1d863a6fad28cc785 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 24 Nov 2018 14:15:48 -0800 Subject: [PATCH 272/291] [spelling] constaint -> constraint --- pyt/vulnerabilities/vulnerabilities.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pyt/vulnerabilities/vulnerabilities.py b/pyt/vulnerabilities/vulnerabilities.py index 0daca2cd..3087c1bd 100644 --- a/pyt/vulnerabilities/vulnerabilities.py +++ b/pyt/vulnerabilities/vulnerabilities.py @@ -359,12 +359,12 @@ def how_vulnerable( def get_tainted_node_in_sink_args( sink_args, - nodes_in_constaint + nodes_in_constraint ): if not sink_args: return None # Starts with the node closest to the sink - for node in nodes_in_constaint: + for node in nodes_in_constraint: if node.left_hand_side in sink_args: return node @@ -398,11 +398,10 @@ def get_vulnerability( Returns: A Vulnerability if it exists, else None """ - nodes_in_constaint = [secondary for secondary in reversed(source.secondary_nodes) + nodes_in_constraint = [secondary for secondary in reversed(source.secondary_nodes) if lattice.in_constraint(secondary, sink.cfg_node)] - nodes_in_constaint.append(source.cfg_node) - + nodes_in_constraint.append(source.cfg_node) if sink.trigger.all_arguments_propagate_taint: sink_args = get_sink_args(sink.cfg_node) else: @@ -410,7 +409,7 @@ def get_vulnerability( tainted_node_in_sink_arg = get_tainted_node_in_sink_args( sink_args, - nodes_in_constaint, + nodes_in_constraint, ) if tainted_node_in_sink_arg: From 6641078459411ff7b4c525c5aa3908d39ed539c7 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 24 Nov 2018 14:40:53 -0800 Subject: [PATCH 273/291] Added links --- CHANGELOG.md | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 928f88cf..9c68d4c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,35 +26,47 @@ If you love PyT, please star our project on GitHub to show your support! :star: ##### November 1st, 2018 #### :boom: Breaking Changes -* Changed trigger file format when specifying specific tainted args ([#182]) +* Changed trigger file format when specifying specific tainted args ([#182], thanks [@bcaller]) #### :tada: New Features -* Function calls such as `list.append` and `dict.update` now propagate taint to the list or dict ([#181]) +* Function calls such as `list.append` and `dict.update` now propagate taint to the list or dict ([#181], thanks [@bcaller]) #### :bug: Bugfixes -* IfExp (or ternary) expression handling improved ([#179]) +* IfExp (or ternary) expression handling improved ([#179], thanks [@bcaller]) + +[#179]: https://github.com/python-security/pyt/pull/179 +[#181]: https://github.com/python-security/pyt/pull/181 +[#182]: https://github.com/python-security/pyt/pull/182 + # 0.40 ##### September 11th, 2018 #### :mega: Release Highlights -* Logging changes. Logging verbosity can be changed with `-v` to `-vvv` ([#172]) +* Logging changes. Logging verbosity can be changed with `-v` to `-vvv` ([#172], thanks [@bcaller]) #### :boom: Breaking Changes * Removed `--trim` option ([#169]) #### :tada: New Features -* Added `--only-unsanitised` flag to not print sanitised vulnerabilities ([#172]) +* Added `--only-unsanitised` flag to not print sanitised vulnerabilities ([#172], thanks [@bcaller]) #### :bug: Bugfixes -* Recursive functions don't cause `RecursionError` ([#173]) -* Handling of chained functions improved ([#171]) +* Recursive functions don't cause `RecursionError` ([#173], thanks [@bcaller]) +* Handling of chained functions improved ([#171], thanks [@bcaller]) + +[#169]: https://github.com/python-security/pyt/pull/169 +[#171]: https://github.com/python-security/pyt/pull/171 +[#172]: https://github.com/python-security/pyt/pull/172 +[#173]: https://github.com/python-security/pyt/pull/173 + # 0.39 ##### August 21st, 2018 ... + # 0.38 ##### August 2nd, 2018 From 52cac9d376448003737e8ffdc7560cf99b4d1758 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 24 Nov 2018 14:47:10 -0800 Subject: [PATCH 274/291] Added most recent PR details --- CHANGELOG.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c68d4c1..36909cfc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,17 @@ If you love PyT, please star our project on GitHub to show your support! :star: [#xxxx]: https://github.com/python-security/pyt/pull/xxxx [@xxxx]: https://github.com/xxxx --> + +# Unreleased + +#### :tada: New Features + +* Added visting functions in the tests of `while` nodes ([#186], thanks [@adrianbn]) + +[@adrianbn]: https://github.com/adrianbn +[#186]: https://github.com/python-security/pyt/pull/186 + + # 0.42 ##### November 1st, 2018 @@ -156,6 +167,7 @@ If you love PyT, please star our project on GitHub to show your support! :star: [#152]: https://github.com/python-security/pyt/pull/152 [#156]: https://github.com/python-security/pyt/pull/156 + # 0.34 ##### April 24th, 2018 From 95e2ac3bdb02647f068f8fdc8a2aaad1fe67e0da Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 24 Nov 2018 15:02:09 -0800 Subject: [PATCH 275/291] Added older `0.39` version details --- CHANGELOG.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36909cfc..3d626450 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -75,7 +75,27 @@ If you love PyT, please star our project on GitHub to show your support! :star: # 0.39 ##### August 21st, 2018 -... +#### :tada: New Features + +* Added handling of assignment unpacking e.g. `a, b, c = d` ([#164], thanks [@bcaller]) +* Made file loading and vulnerability order deterministic ([#165], thanks [@bcaller]) + +#### :bug: Bugfixes +* Fixed VarsVisitor RuntimeError on code like `f(g(a)(b)(c))` ([#163], thanks [@bcaller]) + +#### :telescope: Precision + +* Taint propagates from methods of tainted objects ([#167], thanks [@bcaller]) + +#### :snake: Miscellaneous + +* Cleaned test cases of extraneous reassignments ([#166], thanks [@bcaller]) + +[#163]: https://github.com/python-security/pyt/pull/163 +[#164]: https://github.com/python-security/pyt/pull/164 +[#165]: https://github.com/python-security/pyt/pull/165 +[#166]: https://github.com/python-security/pyt/pull/166 +[#167]: https://github.com/python-security/pyt/pull/167 # 0.38 From 1b61080cbf83979194e32434396d72f122a87650 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sun, 25 Nov 2018 14:49:17 -0800 Subject: [PATCH 276/291] :bug: Fix E128 Flake8 Tavis Failure: `continuation line under-indented for visual indent` --- pyt/vulnerabilities/vulnerabilities.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pyt/vulnerabilities/vulnerabilities.py b/pyt/vulnerabilities/vulnerabilities.py index 3087c1bd..86b3a402 100644 --- a/pyt/vulnerabilities/vulnerabilities.py +++ b/pyt/vulnerabilities/vulnerabilities.py @@ -398,9 +398,14 @@ def get_vulnerability( Returns: A Vulnerability if it exists, else None """ - nodes_in_constraint = [secondary for secondary in reversed(source.secondary_nodes) - if lattice.in_constraint(secondary, - sink.cfg_node)] + nodes_in_constraint = [ + secondary + for secondary in reversed(source.secondary_nodes) + if lattice.in_constraint( + secondary, + sink.cfg_node + ) + ] nodes_in_constraint.append(source.cfg_node) if sink.trigger.all_arguments_propagate_taint: sink_args = get_sink_args(sink.cfg_node) From c0ef67500dff19f07a0251863d7b83dc6a23bf78 Mon Sep 17 00:00:00 2001 From: Adrian Bravo Date: Sat, 8 Dec 2018 15:16:44 +0100 Subject: [PATCH 277/291] 128: Allow the user to cancel interactive mode --- pyt/vulnerabilities/vulnerabilities.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pyt/vulnerabilities/vulnerabilities.py b/pyt/vulnerabilities/vulnerabilities.py index 86b3a402..a12d662c 100644 --- a/pyt/vulnerabilities/vulnerabilities.py +++ b/pyt/vulnerabilities/vulnerabilities.py @@ -336,11 +336,15 @@ def how_vulnerable( return VulnerabilityType.FALSE elif interactive: user_says = input( - 'Is the return value of {} with tainted argument "{}" vulnerable? (Y/n)'.format( + 'Is the return value of {} with tainted argument "{}" vulnerable? ([Y]es/[N]o/[S]top)'.format( current_node.label, chain[i - 1].left_hand_side ) ).lower() + if user_says.startswith('s'): + interactive = False + vuln_deets['unknown_assignment'] = current_node + return VulnerabilityType.UNKNOWN if user_says.startswith('n'): blackbox_mapping['does_not_propagate'].append(current_node.func_name) return VulnerabilityType.FALSE From 986532afdf4431610d9307c4d00a469cb3faac6d Mon Sep 17 00:00:00 2001 From: Adrian Bravo Date: Sun, 9 Dec 2018 01:04:52 +0100 Subject: [PATCH 278/291] 128: Stop asking for all chains --- pyt/vulnerabilities/vulnerabilities.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/pyt/vulnerabilities/vulnerabilities.py b/pyt/vulnerabilities/vulnerabilities.py index a12d662c..2dba0078 100644 --- a/pyt/vulnerabilities/vulnerabilities.py +++ b/pyt/vulnerabilities/vulnerabilities.py @@ -327,16 +327,16 @@ def how_vulnerable( if current_node in sanitiser_nodes: vuln_deets['sanitiser'] = current_node vuln_deets['confident'] = True - return VulnerabilityType.SANITISED + return VulnerabilityType.SANITISED, interactive if isinstance(current_node, BBorBInode): if current_node.func_name in blackbox_mapping['propagates']: continue elif current_node.func_name in blackbox_mapping['does_not_propagate']: - return VulnerabilityType.FALSE + return VulnerabilityType.FALSE, interactive elif interactive: user_says = input( - 'Is the return value of {} with tainted argument "{}" vulnerable? ([Y]es/[N]o/[S]top)'.format( + 'Is the return value of {} with tainted argument "{}" vulnerable? ([Y]es/[N]o/[S]top asking)'.format( current_node.label, chain[i - 1].left_hand_side ) @@ -344,21 +344,21 @@ def how_vulnerable( if user_says.startswith('s'): interactive = False vuln_deets['unknown_assignment'] = current_node - return VulnerabilityType.UNKNOWN + return VulnerabilityType.UNKNOWN, interactive if user_says.startswith('n'): blackbox_mapping['does_not_propagate'].append(current_node.func_name) - return VulnerabilityType.FALSE + return VulnerabilityType.FALSE, interactive blackbox_mapping['propagates'].append(current_node.func_name) else: vuln_deets['unknown_assignment'] = current_node - return VulnerabilityType.UNKNOWN + return VulnerabilityType.UNKNOWN, interactive if potential_sanitiser: vuln_deets['sanitiser'] = potential_sanitiser vuln_deets['confident'] = False - return VulnerabilityType.SANITISED + return VulnerabilityType.SANITISED, interactive - return VulnerabilityType.TRUE + return VulnerabilityType.TRUE, interactive def get_tainted_node_in_sink_args( @@ -443,12 +443,13 @@ def get_vulnerability( cfg.nodes, lattice ) + for chain in get_vulnerability_chains( source.cfg_node, sink.cfg_node, def_use ): - vulnerability_type = how_vulnerable( + vulnerability_type, interactive = how_vulnerable( chain, blackbox_mapping, sanitiser_nodes, @@ -462,9 +463,9 @@ def get_vulnerability( vuln_deets['reassignment_nodes'] = chain - return vuln_factory(vulnerability_type)(**vuln_deets) + return vuln_factory(vulnerability_type)(**vuln_deets), interactive - return None + return None, interactive def find_vulnerabilities_in_cfg( @@ -495,7 +496,7 @@ def find_vulnerabilities_in_cfg( ) for sink in triggers.sinks: for source in triggers.sources: - vulnerability = get_vulnerability( + vulnerability, interactive = get_vulnerability( source, sink, triggers, From 1332b7aa5942d63e8a82a4037682daa9dc206e9f Mon Sep 17 00:00:00 2001 From: bcaller Date: Wed, 16 Jan 2019 16:52:38 +0000 Subject: [PATCH 279/291] Test running pyt in a python 3.7 env It should just work. There were no AST changes. Also removes the unnecesary whitelist_external from tox.ini. Note: pyt can be run in a py36 environment on code targeting 2.7-3.8. There is no requirement for pyt to be run under the same environment as your code is intended to run in. Running pyt under 3.8 will require some further work due to the change from ast.Str to ast.Constant: https://github.com/python/cpython/pull/9445/files https://docs.python.org/dev/whatsnew/3.8.html#deprecated https://bugs.python.org/issue32892 --- .travis.yml | 2 ++ tox.ini | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0638ada6..fe655eef 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,8 @@ language: python +dist: xenial python: - "3.6" + - "3.7" install: - pip install codeclimate-test-reporter 'coverage>=4.0,<4.4' flake8 before_script: diff --git a/tox.ini b/tox.ini index c9b4b248..424c51ef 100644 --- a/tox.ini +++ b/tox.ini @@ -1,12 +1,11 @@ [tox] -envlist = py36,cover,lint +envlist = py36,py37,cover,lint [testenv] commands = python -m tests [testenv:cover] -whitelist_externals = coverage deps = coverage>=4.0,<4.4 commands = From 40c1f2482dbc54b8846f22aae4082ebec6af5683 Mon Sep 17 00:00:00 2001 From: wchresta <34962284+wchresta@users.noreply.github.com> Date: Fri, 22 Mar 2019 17:01:05 -0400 Subject: [PATCH 280/291] Resolve aliases for black box and built-in function calls. * Allow trigger words to be fully qualified to reduce false positives --- .../command_injection_with_aliases.py | 12 +++++++++ pyt/cfg/alias_helper.py | 19 ++++++++++++++ pyt/cfg/stmt_visitor.py | 26 ++++++++++++++++--- .../all_trigger_words.pyt | 6 ++--- tests/main_test.py | 4 +-- tests/vulnerabilities/vulnerabilities_test.py | 15 +++++++++++ 6 files changed, 73 insertions(+), 9 deletions(-) create mode 100644 examples/vulnerable_code/command_injection_with_aliases.py diff --git a/examples/vulnerable_code/command_injection_with_aliases.py b/examples/vulnerable_code/command_injection_with_aliases.py new file mode 100644 index 00000000..309e5268 --- /dev/null +++ b/examples/vulnerable_code/command_injection_with_aliases.py @@ -0,0 +1,12 @@ +import os +import os as myos +from os import system +from os import system as mysystem +from subprocess import call as mycall, Popen as mypopen + +os.system("ls") +myos.system("ls") +system("ls") +mysystem("ls") +mycall("ls") +mypopen("ls") diff --git a/pyt/cfg/alias_helper.py b/pyt/cfg/alias_helper.py index a1c83ab0..9de6d058 100644 --- a/pyt/cfg/alias_helper.py +++ b/pyt/cfg/alias_helper.py @@ -74,3 +74,22 @@ def retrieve_import_alias_mapping(names_list): if alias.asname: import_alias_names[alias.asname] = alias.name return import_alias_names + + +def fully_qualify_alias_labels(label, aliases): + """Replace any aliases in label with the fully qualified name. + + Args: + label -- A label : str representing a name (e.g. myos.system) + aliases -- A dict of {alias: real_name} (e.g. {'myos': 'os'}) + + >>> fully_qualify_alias_labels('myos.mycall', {'myos':'os'}) + 'os.mycall' + """ + for alias, full_name in aliases.items(): + if label == alias: + return full_name + if label.startswith(alias+'.'): + return full_name + label[len(alias):] + return label + diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index 3b9d5f48..e008b096 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -9,7 +9,8 @@ handle_aliases_in_init_files, handle_fdid_aliases, not_as_alias_handler, - retrieve_import_alias_mapping + retrieve_import_alias_mapping, + fully_qualify_alias_labels ) from ..core.ast_helper import ( generate_ast, @@ -61,6 +62,7 @@ class StmtVisitor(ast.NodeVisitor): def __init__(self, allow_local_directory_imports=True): self._allow_local_modules = allow_local_directory_imports + self.bb_or_bi_aliases = {} super().__init__() def visit_Module(self, node): @@ -624,6 +626,10 @@ def add_blackbox_or_builtin_call(self, node, blackbox): # noqa: C901 call_function_label = call_label_visitor.result[:call_label_visitor.result.find('(')] + # Check if function call matches a blackbox/built-in alias and if so, resolve it + # This resolves aliases like "from os import system as mysys" as: mysys -> os.system + call_function_label = fully_qualify_alias_labels(call_function_label, self.bb_or_bi_aliases) + # Create e.g. ~call_1 = ret_func_foo LHS = CALL_IDENTIFIER + 'call_' + str(saved_function_call_index) RHS = 'ret_' + call_function_label + '(' @@ -810,7 +816,6 @@ def add_module( # noqa: C901 module_path = module[1] parent_definitions = self.module_definitions_stack[-1] - # The only place the import_alias_mapping is updated parent_definitions.import_alias_mapping.update(import_alias_mapping) parent_definitions.import_names = local_names @@ -1052,7 +1057,13 @@ def visit_Import(self, node): retrieve_import_alias_mapping(node.names) ) for alias in node.names: - if alias.name not in uninspectable_modules: + if alias.name in uninspectable_modules: + # The module is uninspectable (so blackbox or built-in). If it has an alias, we remember + # the alias so we can do fully qualified name resolution for blackbox- and built-in trigger words + # e.g. we want a call to "os.system" be recognised, even if we do "import os as myos" + if alias.asname is not None and alias.asname != alias.name: + self.bb_or_bi_aliases[alias.asname] = alias.name + else: log.warn("Cannot inspect module %s", alias.name) uninspectable_modules.add(alias.name) # Don't repeatedly warn about this return IgnoredNode() @@ -1094,7 +1105,14 @@ def visit_ImportFrom(self, node): retrieve_import_alias_mapping(node.names), from_from=True ) - if node.module not in uninspectable_modules: + + if node.module in uninspectable_modules: + # Remember aliases for blackboxed and built-in imports such that we can label them fully qualified + # e.g. we want a call to "os.system" be recognised, even if we do "from os import system" + # from os import system as mysystem -> module=os, name=system, asname=mysystem + for name in node.names: + self.bb_or_bi_aliases[name.asname or name.name] = "{}.{}".format(node.module, name.name) + else: log.warn("Cannot inspect module %s", node.module) uninspectable_modules.add(node.module) return IgnoredNode() diff --git a/pyt/vulnerability_definitions/all_trigger_words.pyt b/pyt/vulnerability_definitions/all_trigger_words.pyt index 5642db5c..615e86b6 100644 --- a/pyt/vulnerability_definitions/all_trigger_words.pyt +++ b/pyt/vulnerability_definitions/all_trigger_words.pyt @@ -31,9 +31,10 @@ ] }, "execute(": {}, - "system(": {}, + "os.system(": {}, "filter(": {}, "subprocess.call(": {}, + "subprocess.Popen(": {}, "render_template(": {}, "set_cookie(": {}, "redirect(": {}, @@ -41,7 +42,6 @@ "flash(": {}, "jsonify(": {}, "render(": {}, - "render_to_response(": {}, - "Popen(": {} + "render_to_response(": {} } } diff --git a/tests/main_test.py b/tests/main_test.py index 561d8bd1..fef1e124 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -108,11 +108,11 @@ def test_targets_with_recursive(self): excluded_files = "" included_files = discover_files(targets, excluded_files, True) - self.assertEqual(len(included_files), 33) + self.assertEqual(len(included_files), 34) def test_targets_with_recursive_and_excluded(self): targets = ["examples/vulnerable_code/"] excluded_files = "inter_command_injection.py" included_files = discover_files(targets, excluded_files, True) - self.assertEqual(len(included_files), 32) + self.assertEqual(len(included_files), 33) diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index 5a28aa03..fe2fa64c 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -482,6 +482,21 @@ def test_list_append_taints_list(self): ) self.assert_length(vulnerabilities, expected_length=1) + def test_import_bb_or_bi_with_alias(self): + self.cfg_create_from_file('examples/vulnerable_code/command_injection_with_aliases.py') + + EXPECTED = ['Entry module', + "~call_1 = ret_os.system('ls')", + "~call_2 = ret_os.system('ls')", + "~call_3 = ret_os.system('ls')", + "~call_4 = ret_os.system('ls')", + "~call_5 = ret_subprocess.call('ls')", + "~call_6 = ret_subprocess.Popen('ls')", + 'Exit module' + ] + for node, expected_label in zip(self.cfg.nodes, EXPECTED): + self.assertEqual(node.label, expected_label) + class EngineDjangoTest(VulnerabilitiesBaseTestCase): def run_analysis(self, path): From 57448a0c7761ee7cc054b1630ec1e66e5581f850 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 23 Mar 2019 12:33:59 -0700 Subject: [PATCH 281/291] :wave: Add `no longer maintained` section --- README.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.rst b/README.rst index 025a3705..a84df88d 100644 --- a/README.rst +++ b/README.rst @@ -19,6 +19,26 @@ .. image:: https://img.shields.io/badge/Donate-Charity-orange.svg :target: https://www.againstmalaria.com/donation.aspx +This project is no longer maintained +==================================== + +`Pyre`_ from Facebook is an amazing project that has a bright future and many smart people working on it. +I would suggest, if you don't know that much about program analysis, that you understand how PyT works before diving into Pyre. Along with the `README's in most directories`_, there is the original `Master's Thesis`_ and `some slides`_. +With that said, I am happy to review pull requests and give you write permissions if you make more than a few. + +There were a lot of great contributors to this project, I plan on working on other projects like `detect-secrets`_ and others (e.g. Pyre eventually) in the future if you'd like to work together more :) + +If you are a security engineer with e.g. a Python codebase without type annotations, that Pyre won't handle, I would suggest you replace your sinks with a secure wrapper (something like `defusedxml`_), and alert off any uses of the standard sink. You can use `Bandit`_ to do this but you will have to trim it a lot, due to the high false-positive rate. + +.. _Pyre: https://github.com/facebook/pyre-check +.. _README's in most directories: https://github.com/python-security/pyt/tree/master/pyt#how-it-works +.. _Master's Thesis: https://projekter.aau.dk/projekter/files/239563289/final.pdf +.. _some slides: https://docs.google.com/presentation/d/1JfAykAxR0DcJwwGfHmhrz1RhhKqYsnt5x_GY8CbTp7s +.. _detect-secrets: https://github.com/Yelp/detect-secrets/blob/master/CHANGELOG.md#whats-new +.. _defusedxml: https://pypi.org/project/defusedxml/ +.. _Bandit: https://github.com/PyCQA/bandit + + Python Taint ============ From 21e6027efb94669c6374615350373de850e623a4 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 23 Mar 2019 12:37:53 -0700 Subject: [PATCH 282/291] Bold important text --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index a84df88d..831a392d 100644 --- a/README.rst +++ b/README.rst @@ -24,7 +24,7 @@ This project is no longer maintained `Pyre`_ from Facebook is an amazing project that has a bright future and many smart people working on it. I would suggest, if you don't know that much about program analysis, that you understand how PyT works before diving into Pyre. Along with the `README's in most directories`_, there is the original `Master's Thesis`_ and `some slides`_. -With that said, I am happy to review pull requests and give you write permissions if you make more than a few. +With that said, **I am happy to review pull requests and give you write permissions if you make more than a few.** There were a lot of great contributors to this project, I plan on working on other projects like `detect-secrets`_ and others (e.g. Pyre eventually) in the future if you'd like to work together more :) From 1ff3901b89c0db437a6b3b613f3c262be10cd61c Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 23 Mar 2019 12:38:55 -0700 Subject: [PATCH 283/291] [grammar] is -> are --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 831a392d..852a8b2d 100644 --- a/README.rst +++ b/README.rst @@ -23,7 +23,7 @@ This project is no longer maintained ==================================== `Pyre`_ from Facebook is an amazing project that has a bright future and many smart people working on it. -I would suggest, if you don't know that much about program analysis, that you understand how PyT works before diving into Pyre. Along with the `README's in most directories`_, there is the original `Master's Thesis`_ and `some slides`_. +I would suggest, if you don't know that much about program analysis, that you understand how PyT works before diving into Pyre. Along with the `README's in most directories`_, there are the original `Master's Thesis`_ and `some slides`_. With that said, **I am happy to review pull requests and give you write permissions if you make more than a few.** There were a lot of great contributors to this project, I plan on working on other projects like `detect-secrets`_ and others (e.g. Pyre eventually) in the future if you'd like to work together more :) From 022476a014ed3a10ff04dd7d2450192e676c9100 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sat, 23 Mar 2019 12:39:53 -0700 Subject: [PATCH 284/291] Update README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 852a8b2d..32dba718 100644 --- a/README.rst +++ b/README.rst @@ -28,7 +28,7 @@ With that said, **I am happy to review pull requests and give you write permissi There were a lot of great contributors to this project, I plan on working on other projects like `detect-secrets`_ and others (e.g. Pyre eventually) in the future if you'd like to work together more :) -If you are a security engineer with e.g. a Python codebase without type annotations, that Pyre won't handle, I would suggest you replace your sinks with a secure wrapper (something like `defusedxml`_), and alert off any uses of the standard sink. You can use `Bandit`_ to do this but you will have to trim it a lot, due to the high false-positive rate. +If you are a security engineer with e.g. a Python codebase without type annotations, that Pyre won't handle, I would suggest you replace your sinks with a secure wrapper (something like `defusedxml`_), and alert off any uses of the standard sink. You can use `Bandit`_ to do this since dataflow analysis is not required, but you will have to trim it a lot, due to the high false-positive rate. .. _Pyre: https://github.com/facebook/pyre-check .. _README's in most directories: https://github.com/python-security/pyt/tree/master/pyt#how-it-works From 36bd520cf3e52e218aa79d23cb3113a934551bc6 Mon Sep 17 00:00:00 2001 From: wchresta <34962284+wchresta@users.noreply.github.com> Date: Sat, 23 Mar 2019 16:30:01 -0400 Subject: [PATCH 285/291] Use import_alias_mapping for blackbox and built-in aliases, as well * This will give fully qualified names for blackboxes like flask * Improve readability by using keyword arguments --- .../command_injection_with_aliases.py | 31 +++- pyt/cfg/stmt_visitor.py | 149 +++++++++--------- .../vulnerabilities_across_files_test.py | 2 +- .../vulnerabilities_base_test_case.py | 4 +- tests/vulnerabilities/vulnerabilities_test.py | 58 +++---- 5 files changed, 132 insertions(+), 112 deletions(-) diff --git a/examples/vulnerable_code/command_injection_with_aliases.py b/examples/vulnerable_code/command_injection_with_aliases.py index 309e5268..4e409c52 100644 --- a/examples/vulnerable_code/command_injection_with_aliases.py +++ b/examples/vulnerable_code/command_injection_with_aliases.py @@ -4,9 +4,28 @@ from os import system as mysystem from subprocess import call as mycall, Popen as mypopen -os.system("ls") -myos.system("ls") -system("ls") -mysystem("ls") -mycall("ls") -mypopen("ls") +from flask import Flask, render_template, request + +app = Flask(__name__) + + +@app.route('/menu', methods=['POST']) +def menu(): + param = request.form['suggestion'] + command = 'echo ' + param + ' >> ' + 'menu.txt' + + os.system(command) + myos.system(command) + system(command) + mysystem(command) + mycall(command) + mypopen(command) + + with open('menu.txt', 'r') as f: + menu_ctx = f.read() + + return render_template('command_injection.html', menu=menu_ctx) + + +if __name__ == '__main__': + app.run(debug=True) diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index e008b096..0a35c566 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -62,7 +62,6 @@ class StmtVisitor(ast.NodeVisitor): def __init__(self, allow_local_directory_imports=True): self._allow_local_modules = allow_local_directory_imports - self.bb_or_bi_aliases = {} super().__init__() def visit_Module(self, node): @@ -628,7 +627,8 @@ def add_blackbox_or_builtin_call(self, node, blackbox): # noqa: C901 # Check if function call matches a blackbox/built-in alias and if so, resolve it # This resolves aliases like "from os import system as mysys" as: mysys -> os.system - call_function_label = fully_qualify_alias_labels(call_function_label, self.bb_or_bi_aliases) + local_definitions = self.module_definitions_stack[-1] + call_function_label = fully_qualify_alias_labels(call_function_label, local_definitions.import_alias_mapping) # Create e.g. ~call_1 = ret_func_foo LHS = CALL_IDENTIFIER + 'call_' + str(saved_function_call_index) @@ -924,10 +924,10 @@ def from_directory_import( if init_exists and not skip_init: package_name = os.path.split(module_path)[1] return self.add_module( - (module[0], init_file_location), - package_name, - local_names, - import_alias_mapping, + module=(module[0], init_file_location), + module_or_package_name=package_name, + local_names=local_names, + import_alias_mapping=import_alias_mapping, is_init=True, from_from=True ) @@ -937,10 +937,10 @@ def from_directory_import( new_init_file_location = os.path.join(full_name, '__init__.py') if os.path.isfile(new_init_file_location): self.add_module( - (real_name, new_init_file_location), - real_name, - local_names, - import_alias_mapping, + module=(real_name, new_init_file_location), + module_or_package_name=real_name, + local_names=local_names, + import_alias_mapping=import_alias_mapping, is_init=True, from_from=True, from_fdid=True @@ -950,10 +950,10 @@ def from_directory_import( else: file_module = (real_name, full_name + '.py') self.add_module( - file_module, - real_name, - local_names, - import_alias_mapping, + module=file_module, + module_or_package_name=real_name, + local_names=local_names, + import_alias_mapping=import_alias_mapping, from_from=True ) return IgnoredNode() @@ -964,10 +964,10 @@ def import_package(self, module, module_name, local_name, import_alias_mapping): init_exists = os.path.isfile(init_file_location) if init_exists: return self.add_module( - (module[0], init_file_location), - module_name, - local_name, - import_alias_mapping, + module=(module[0], init_file_location), + module_or_package_name=module_name, + local_names=local_name, + import_alias_mapping=import_alias_mapping, is_init=True ) else: @@ -1010,10 +1010,10 @@ def handle_relative_import(self, node): # Is it a file? if name_with_dir.endswith('.py'): return self.add_module( - (node.module, name_with_dir), - None, - as_alias_handler(node.names), - retrieve_import_alias_mapping(node.names), + module=(node.module, name_with_dir), + module_or_package_name=None, + local_names=as_alias_handler(node.names), + import_alias_mapping=retrieve_import_alias_mapping(node.names), from_from=True ) return self.from_directory_import( @@ -1036,10 +1036,10 @@ def visit_Import(self, node): retrieve_import_alias_mapping(node.names) ) return self.add_module( - module, - name.name, - name.asname, - retrieve_import_alias_mapping(node.names) + module=module, + module_or_package_name=name.name, + local_names=name.asname, + import_alias_mapping=retrieve_import_alias_mapping(node.names) ) for module in self.project_modules: if name.name == module[0]: @@ -1051,20 +1051,20 @@ def visit_Import(self, node): retrieve_import_alias_mapping(node.names) ) return self.add_module( - module, - name.name, - name.asname, - retrieve_import_alias_mapping(node.names) + module=module, + module_or_package_name=name.name, + local_names=name.asname, + import_alias_mapping=retrieve_import_alias_mapping(node.names) ) for alias in node.names: - if alias.name in uninspectable_modules: - # The module is uninspectable (so blackbox or built-in). If it has an alias, we remember - # the alias so we can do fully qualified name resolution for blackbox- and built-in trigger words - # e.g. we want a call to "os.system" be recognised, even if we do "import os as myos" - if alias.asname is not None and alias.asname != alias.name: - self.bb_or_bi_aliases[alias.asname] = alias.name - else: - log.warn("Cannot inspect module %s", alias.name) + # The module is uninspectable (so blackbox or built-in). If it has an alias, we remember + # the alias so we can do fully qualified name resolution for blackbox- and built-in trigger words + # e.g. we want a call to "os.system" be recognised, even if we do "import os as myos" + if alias.asname is not None and alias.asname != alias.name: + local_definitions = self.module_definitions_stack[-1] + local_definitions.import_alias_mapping[name.asname] = alias.name + if alias.name not in uninspectable_modules: + log.warning("Cannot inspect module %s", alias.name) uninspectable_modules.add(alias.name) # Don't repeatedly warn about this return IgnoredNode() @@ -1072,47 +1072,48 @@ def visit_ImportFrom(self, node): # Is it relative? if node.level > 0: return self.handle_relative_import(node) - else: - for module in self.local_modules: - if node.module == module[0]: - if os.path.isdir(module[1]): - return self.from_directory_import( - module, - not_as_alias_handler(node.names), - as_alias_handler(node.names) - ) - return self.add_module( + # not relative + for module in self.local_modules: + if node.module == module[0]: + if os.path.isdir(module[1]): + return self.from_directory_import( module, - None, - as_alias_handler(node.names), - retrieve_import_alias_mapping(node.names), - from_from=True + not_as_alias_handler(node.names), + as_alias_handler(node.names) ) - for module in self.project_modules: - name = module[0] - if node.module == name: - if os.path.isdir(module[1]): - return self.from_directory_import( - module, - not_as_alias_handler(node.names), - as_alias_handler(node.names), - retrieve_import_alias_mapping(node.names) - ) - return self.add_module( + return self.add_module( + module=module, + module_or_package_name=None, + local_names=as_alias_handler(node.names), + import_alias_mapping=retrieve_import_alias_mapping(node.names), + from_from=True + ) + for module in self.project_modules: + name = module[0] + if node.module == name: + if os.path.isdir(module[1]): + return self.from_directory_import( module, - None, + not_as_alias_handler(node.names), as_alias_handler(node.names), - retrieve_import_alias_mapping(node.names), - from_from=True + retrieve_import_alias_mapping(node.names) ) + return self.add_module( + module=module, + module_or_package_name=None, + local_names=as_alias_handler(node.names), + import_alias_mapping=retrieve_import_alias_mapping(node.names), + from_from=True + ) - if node.module in uninspectable_modules: - # Remember aliases for blackboxed and built-in imports such that we can label them fully qualified - # e.g. we want a call to "os.system" be recognised, even if we do "from os import system" - # from os import system as mysystem -> module=os, name=system, asname=mysystem - for name in node.names: - self.bb_or_bi_aliases[name.asname or name.name] = "{}.{}".format(node.module, name.name) - else: - log.warn("Cannot inspect module %s", node.module) + # Remember aliases for uninspecatble modules such that we can label them fully qualified + # e.g. we want a call to "os.system" be recognised, even if we do "from os import system" + # from os import system as mysystem -> module=os, name=system, asname=mysystem + for name in node.names: + local_definitions = self.module_definitions_stack[-1] + local_definitions.import_alias_mapping[name.asname or name.name] = "{}.{}".format(node.module, name.name) + + if node.module not in uninspectable_modules: + log.warning("Cannot inspect module %s", node.module) uninspectable_modules.add(node.module) return IgnoredNode() diff --git a/tests/vulnerabilities/vulnerabilities_across_files_test.py b/tests/vulnerabilities/vulnerabilities_across_files_test.py index bd63b190..d7b8f0d1 100644 --- a/tests/vulnerabilities/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities/vulnerabilities_across_files_test.py @@ -62,7 +62,7 @@ def test_blackbox_library_call(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code_across_files/blackbox_library_call.py > User input at line 12, source "request.args.get(": - ~call_1 = ret_request.args.get('suggestion') + ~call_1 = ret_flask.request.args.get('suggestion') Reassigned in: File: examples/vulnerable_code_across_files/blackbox_library_call.py > Line 12: param = ~call_1 diff --git a/tests/vulnerabilities/vulnerabilities_base_test_case.py b/tests/vulnerabilities/vulnerabilities_base_test_case.py index c21f81ed..a7e86121 100644 --- a/tests/vulnerabilities/vulnerabilities_base_test_case.py +++ b/tests/vulnerabilities/vulnerabilities_base_test_case.py @@ -11,7 +11,7 @@ def string_compare_alpha(self, output, expected_string): def assertAlphaEqual(self, output, expected_string): self.assertEqual( - [char for char in output if char.isalpha()], - [char for char in expected_string if char.isalpha()] + ''.join(char for char in output if char.isalpha()), + ''.join(char for char in expected_string if char.isalpha()) ) return True diff --git a/tests/vulnerabilities/vulnerabilities_test.py b/tests/vulnerabilities/vulnerabilities_test.py index fe2fa64c..893ec70a 100644 --- a/tests/vulnerabilities/vulnerabilities_test.py +++ b/tests/vulnerabilities/vulnerabilities_test.py @@ -150,7 +150,7 @@ def test_XSS_result(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/XSS.py > User input at line 6, source "request.args.get(": - ~call_1 = ret_request.args.get('param', 'not set') + ~call_1 = ret_flask.request.args.get('param', 'not set') Reassigned in: File: examples/vulnerable_code/XSS.py > Line 6: param = ~call_1 @@ -186,7 +186,7 @@ def test_path_traversal_result(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/path_traversal.py > User input at line 15, source "request.args.get(": - ~call_1 = ret_request.args.get('image_name') + ~call_1 = ret_flask.request.args.get('image_name') Reassigned in: File: examples/vulnerable_code/path_traversal.py > Line 15: image_name = ~call_1 @@ -210,7 +210,7 @@ def test_path_traversal_result(self): > Line 19: foo = ~call_2 File: examples/vulnerable_code/path_traversal.py > reaches line 20, sink "send_file(": - ~call_4 = ret_send_file(foo) + ~call_4 = ret_flask.send_file(foo) """ self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) @@ -222,7 +222,7 @@ def test_ensure_saved_scope(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/ensure_saved_scope.py > User input at line 15, source "request.args.get(": - ~call_1 = ret_request.args.get('image_name') + ~call_1 = ret_flask.request.args.get('image_name') Reassigned in: File: examples/vulnerable_code/ensure_saved_scope.py > Line 15: image_name = ~call_1 @@ -232,7 +232,7 @@ def test_ensure_saved_scope(self): > Line 10: save_3_image_name = image_name File: examples/vulnerable_code/ensure_saved_scope.py > reaches line 20, sink "send_file(": - ~call_4 = ret_send_file(image_name) + ~call_4 = ret_flask.send_file(image_name) """ self.assertAlphaEqual( vulnerability_description, @@ -246,7 +246,7 @@ def test_path_traversal_sanitised_result(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/path_traversal_sanitised.py > User input at line 8, source "request.args.get(": - ~call_1 = ret_request.args.get('image_name') + ~call_1 = ret_flask.request.args.get('image_name') Reassigned in: File: examples/vulnerable_code/path_traversal_sanitised.py > Line 8: image_name = ~call_1 @@ -258,7 +258,7 @@ def test_path_traversal_sanitised_result(self): > Line 12: ~call_4 = ret_os.path.join(~call_5, image_name) File: examples/vulnerable_code/path_traversal_sanitised.py > reaches line 12, sink "send_file(": - ~call_3 = ret_send_file(~call_4) + ~call_3 = ret_flask.send_file(~call_4) This vulnerability is sanitised by: Label: ~call_2 = ret_image_name.replace('..', '') """ @@ -271,7 +271,7 @@ def test_path_traversal_sanitised_2_result(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/path_traversal_sanitised_2.py > User input at line 8, source "request.args.get(": - ~call_1 = ret_request.args.get('image_name') + ~call_1 = ret_flask.request.args.get('image_name') Reassigned in: File: examples/vulnerable_code/path_traversal_sanitised_2.py > Line 8: image_name = ~call_1 @@ -279,7 +279,7 @@ def test_path_traversal_sanitised_2_result(self): > Line 12: ~call_3 = ret_os.path.join(~call_4, image_name) File: examples/vulnerable_code/path_traversal_sanitised_2.py > reaches line 12, sink "send_file(": - ~call_2 = ret_send_file(~call_3) + ~call_2 = ret_flask.send_file(~call_3) This vulnerability is potentially sanitised by: Label: if '..' in image_name: """ @@ -292,7 +292,7 @@ def test_sql_result(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/sql/sqli.py > User input at line 26, source "request.args.get(": - ~call_1 = ret_request.args.get('param', 'not set') + ~call_1 = ret_flask.request.args.get('param', 'not set') Reassigned in: File: examples/vulnerable_code/sql/sqli.py > Line 26: param = ~call_1 @@ -347,7 +347,7 @@ def test_XSS_reassign_result(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/XSS_reassign.py > User input at line 6, source "request.args.get(": - ~call_1 = ret_request.args.get('param', 'not set') + ~call_1 = ret_flask.request.args.get('param', 'not set') Reassigned in: File: examples/vulnerable_code/XSS_reassign.py > Line 6: param = ~call_1 @@ -367,18 +367,18 @@ def test_XSS_sanitised_result(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/XSS_sanitised.py > User input at line 7, source "request.args.get(": - ~call_1 = ret_request.args.get('param', 'not set') + ~call_1 = ret_flask.request.args.get('param', 'not set') Reassigned in: File: examples/vulnerable_code/XSS_sanitised.py > Line 7: param = ~call_1 File: examples/vulnerable_code/XSS_sanitised.py - > Line 9: ~call_2 = ret_Markup.escape(param) + > Line 9: ~call_2 = ret_flask.Markup.escape(param) File: examples/vulnerable_code/XSS_sanitised.py > Line 9: param = ~call_2 File: examples/vulnerable_code/XSS_sanitised.py > reaches line 12, sink "replace(": ~call_5 = ret_html.replace('{{ param }}', param) - This vulnerability is sanitised by: Label: ~call_2 = ret_Markup.escape(param) + This vulnerability is sanitised by: Label: ~call_2 = ret_flask.Markup.escape(param) """ self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) @@ -394,7 +394,7 @@ def test_XSS_variable_assign_result(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/XSS_variable_assign.py > User input at line 6, source "request.args.get(": - ~call_1 = ret_request.args.get('param', 'not set') + ~call_1 = ret_flask.request.args.get('param', 'not set') Reassigned in: File: examples/vulnerable_code/XSS_variable_assign.py > Line 6: param = ~call_1 @@ -414,7 +414,7 @@ def test_XSS_variable_multiple_assign_result(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: examples/vulnerable_code/XSS_variable_multiple_assign.py > User input at line 6, source "request.args.get(": - ~call_1 = ret_request.args.get('param', 'not set') + ~call_1 = ret_flask.request.args.get('param', 'not set') Reassigned in: File: examples/vulnerable_code/XSS_variable_multiple_assign.py > Line 6: param = ~call_1 @@ -483,19 +483,19 @@ def test_list_append_taints_list(self): self.assert_length(vulnerabilities, expected_length=1) def test_import_bb_or_bi_with_alias(self): - self.cfg_create_from_file('examples/vulnerable_code/command_injection_with_aliases.py') - - EXPECTED = ['Entry module', - "~call_1 = ret_os.system('ls')", - "~call_2 = ret_os.system('ls')", - "~call_3 = ret_os.system('ls')", - "~call_4 = ret_os.system('ls')", - "~call_5 = ret_subprocess.call('ls')", - "~call_6 = ret_subprocess.Popen('ls')", - 'Exit module' + vulnerabilities = self.run_analysis('examples/vulnerable_code/command_injection_with_aliases.py') + + EXPECTED_SINK_TRIGGER_WORDS = [ + 'os.system(', + 'os.system(', + 'os.system(', + 'os.system(', + 'subprocess.call(', + 'subprocess.Popen(' ] - for node, expected_label in zip(self.cfg.nodes, EXPECTED): - self.assertEqual(node.label, expected_label) + + for vuln, expected_sink_trigger_word in zip(vulnerabilities, EXPECTED_SINK_TRIGGER_WORDS): + self.assertEqual(vuln.sink_trigger_word, expected_sink_trigger_word) class EngineDjangoTest(VulnerabilitiesBaseTestCase): @@ -531,7 +531,7 @@ def test_django_view_param(self): param File: examples/vulnerable_code/django_XSS.py > reaches line 5, sink "render(": - ~call_1 = ret_render(request, 'templates/xss.html', 'param'param) + ~call_1 = ret_django.shortcuts.render(request, 'templates/xss.html', 'param'param) """ self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION) From 61f0408574685f6fb2a1ffd4c39a49964056aa54 Mon Sep 17 00:00:00 2001 From: wchresta <34962284+wchresta@users.noreply.github.com> Date: Sat, 23 Mar 2019 16:45:29 -0400 Subject: [PATCH 286/291] Remove blank line at end of file. --- pyt/cfg/alias_helper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyt/cfg/alias_helper.py b/pyt/cfg/alias_helper.py index 9de6d058..9d780992 100644 --- a/pyt/cfg/alias_helper.py +++ b/pyt/cfg/alias_helper.py @@ -92,4 +92,3 @@ def fully_qualify_alias_labels(label, aliases): if label.startswith(alias+'.'): return full_name + label[len(alias):] return label - From a7bb0b275ffc78c4cc168ae529e5adb7467979cd Mon Sep 17 00:00:00 2001 From: wchresta <34962284+wchresta@users.noreply.github.com> Date: Sat, 23 Mar 2019 18:13:50 -0400 Subject: [PATCH 287/291] Implement suggestions from code-review. --- pyt/cfg/alias_helper.py | 2 +- pyt/cfg/stmt_visitor.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pyt/cfg/alias_helper.py b/pyt/cfg/alias_helper.py index 9d780992..920af3f5 100644 --- a/pyt/cfg/alias_helper.py +++ b/pyt/cfg/alias_helper.py @@ -89,6 +89,6 @@ def fully_qualify_alias_labels(label, aliases): for alias, full_name in aliases.items(): if label == alias: return full_name - if label.startswith(alias+'.'): + elif label.startswith(alias+'.'): return full_name + label[len(alias):] return label diff --git a/pyt/cfg/stmt_visitor.py b/pyt/cfg/stmt_visitor.py index 0a35c566..16da1bb0 100644 --- a/pyt/cfg/stmt_visitor.py +++ b/pyt/cfg/stmt_visitor.py @@ -6,11 +6,11 @@ from .alias_helper import ( as_alias_handler, + fully_qualify_alias_labels, handle_aliases_in_init_files, handle_fdid_aliases, not_as_alias_handler, - retrieve_import_alias_mapping, - fully_qualify_alias_labels + retrieve_import_alias_mapping ) from ..core.ast_helper import ( generate_ast, @@ -816,6 +816,7 @@ def add_module( # noqa: C901 module_path = module[1] parent_definitions = self.module_definitions_stack[-1] + # Here, in `visit_Import` and in `visit_ImportFrom` are the only places the `import_alias_mapping` is updated parent_definitions.import_alias_mapping.update(import_alias_mapping) parent_definitions.import_names = local_names @@ -1106,7 +1107,7 @@ def visit_ImportFrom(self, node): from_from=True ) - # Remember aliases for uninspecatble modules such that we can label them fully qualified + # Remember aliases for uninspectable modules such that we can label them fully qualified # e.g. we want a call to "os.system" be recognised, even if we do "from os import system" # from os import system as mysystem -> module=os, name=system, asname=mysystem for name in node.names: From 9d2a607001e038bda86bf2aa4f0d7fde5751d274 Mon Sep 17 00:00:00 2001 From: wchresta <34962284+wchresta@users.noreply.github.com> Date: Sat, 23 Mar 2019 18:37:20 -0400 Subject: [PATCH 288/291] Add fully qualified shell injection sinks. --- .../all_trigger_words.pyt | 47 +++++++++++++++---- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/pyt/vulnerability_definitions/all_trigger_words.pyt b/pyt/vulnerability_definitions/all_trigger_words.pyt index 615e86b6..edd4649c 100644 --- a/pyt/vulnerability_definitions/all_trigger_words.pyt +++ b/pyt/vulnerability_definitions/all_trigger_words.pyt @@ -30,18 +30,49 @@ "'..' in" ] }, + "commands.getoutput(": {}, + "commands.getstatusoutput(": {}, "execute(": {}, - "os.system(": {}, "filter(": {}, - "subprocess.call(": {}, - "subprocess.Popen(": {}, - "render_template(": {}, - "set_cookie(": {}, - "redirect(": {}, - "url_for(": {}, "flash(": {}, "jsonify(": {}, + "os.execl(": {}, + "os.execle(": {}, + "os.execlp(": {}, + "os.execlpe(": {}, + "os.execv(": {}, + "os.execve(": {}, + "os.execvp(": {}, + "os.execvpe(": {}, + "os.popen(": {}, + "os.popen2(": {}, + "os.popen3(": {}, + "os.popen4(": {}, + "os.spawnl(": {}, + "os.spawnle(": {}, + "os.spawnlp(": {}, + "os.spawnlpe(": {}, + "os.spawnv(": {}, + "os.spawnve(": {}, + "os.spawnvp(": {}, + "os.spawnvpe(": {}, + "os.startfile(": {}, + "os.system(": {}, + "popen2.Popen3(": {}, + "popen2.Popen4(": {}, + "popen2.popen2(": {}, + "popen2.popen3(": {}, + "popen2.popen4(": {}, + "redirect(": {}, "render(": {}, - "render_to_response(": {} + "render_template(": {}, + "render_to_response(": {}, + "set_cookie(": {}, + "subprocess.Popen(": {}, + "subprocess.call(": {}, + "subprocess.check_call(": {}, + "subprocess.check_output(": {}, + "subprocess.run(": {}, + "url_for(": {} } } From e9dc6839d2dd8a126543936cfe6cbca8fdca6beb Mon Sep 17 00:00:00 2001 From: Berkeley Churchill Date: Mon, 12 Aug 2019 21:56:39 -0700 Subject: [PATCH 289/291] Update README.rst Warn users to use python3.6 or 3.7 --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index 32dba718..b0240895 100644 --- a/README.rst +++ b/README.rst @@ -63,6 +63,8 @@ Example usage and output: Install ======= +Before continuing, make sure you have python3.6 or 3.7 installed. + .. code-block:: python pip install python-taint From b51dd7f35e71b1c561afa8d0735390ef1693cf6d Mon Sep 17 00:00:00 2001 From: Berkeley Churchill Date: Tue, 13 Aug 2019 07:04:56 +0000 Subject: [PATCH 290/291] adding eval, exec sinks --- pyt/vulnerability_definitions/all_trigger_words.pyt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyt/vulnerability_definitions/all_trigger_words.pyt b/pyt/vulnerability_definitions/all_trigger_words.pyt index edd4649c..7d7d2999 100644 --- a/pyt/vulnerability_definitions/all_trigger_words.pyt +++ b/pyt/vulnerability_definitions/all_trigger_words.pyt @@ -32,6 +32,8 @@ }, "commands.getoutput(": {}, "commands.getstatusoutput(": {}, + "eval(": {}, + "exec(": {}, "execute(": {}, "filter(": {}, "flash(": {}, From f4ec9e127497a7ba7d08d68e8fca8b2f06756679 Mon Sep 17 00:00:00 2001 From: KevinHock Date: Sun, 8 Mar 2020 14:43:06 -0700 Subject: [PATCH 291/291] :mortar_board: Add mention of Pysa tutorial https://github.com/facebook/pyre-check/tree/master/pysa_tutorial#pysa-tutorial --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index b0240895..815b430b 100644 --- a/README.rst +++ b/README.rst @@ -22,6 +22,8 @@ This project is no longer maintained ==================================== +**March 2020 Update**: Please go see the amazing `Pysa tutorial`_ that should get you up to speed finding security vulnerabilities in your Python codebase. + `Pyre`_ from Facebook is an amazing project that has a bright future and many smart people working on it. I would suggest, if you don't know that much about program analysis, that you understand how PyT works before diving into Pyre. Along with the `README's in most directories`_, there are the original `Master's Thesis`_ and `some slides`_. With that said, **I am happy to review pull requests and give you write permissions if you make more than a few.** @@ -30,6 +32,7 @@ There were a lot of great contributors to this project, I plan on working on oth If you are a security engineer with e.g. a Python codebase without type annotations, that Pyre won't handle, I would suggest you replace your sinks with a secure wrapper (something like `defusedxml`_), and alert off any uses of the standard sink. You can use `Bandit`_ to do this since dataflow analysis is not required, but you will have to trim it a lot, due to the high false-positive rate. +.. _Pysa tutorial: https://github.com/facebook/pyre-check/tree/master/pysa_tutorial#pysa-tutorial .. _Pyre: https://github.com/facebook/pyre-check .. _README's in most directories: https://github.com/python-security/pyt/tree/master/pyt#how-it-works .. _Master's Thesis: https://projekter.aau.dk/projekter/files/239563289/final.pdf