diff --git a/.directory b/.directory new file mode 100644 index 00000000..cb15c4c3 --- /dev/null +++ b/.directory @@ -0,0 +1,2 @@ +[Desktop Entry] +Icon=/home/vainlystrain/Pictures/VailynIcon111v.png diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..f0cbedd3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,35 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Actual behavior** +A clear and concise description of what happened instead. If applicable, paste logs & error output here. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Specs (please complete the following information):** + - OS: [e.g. Kali Linux] + - Python Version [e.g. 3.8.6] + - TIDoS Version [e.g. 2.0] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..e301d68c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: feature request +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..573fe6ae --- /dev/null +++ b/.travis.yml @@ -0,0 +1,24 @@ +group: travis_latest +language: python +cache: pip +matrix: + include: + - python: 3.7 + dist: xenial # required for Python 3.7 (travis-ci/travis-ci#9069) + sudo: required # required for Python 3.7 (travis-ci/travis-ci#9069) + - python: 3.8 + dist: xenial # required for Python 3.7 (travis-ci/travis-ci#9069) + sudo: required # required for Python 3.7 (travis-ci/travis-ci#9069) +install: + # - pip install -r requirements.txt + - pip install flake8 +before_script: + # stop the build if there are Python syntax errors or undefined names + - flake8 . --count --exclude=./core/lib --select=E901,E999,F821,F822,F823 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + - flake8 . --count --exclude=./core/lib --exit-zero --max-complexity=10 --max-line-length=127 --statistics --quiet +script: + - true # add other tests here +notifications: + on_success: change + on_failure: change # `always` will be the setting once code changes slow down diff --git a/CHANGELOG b/CHANGELOG deleted file mode 100644 index 8ceceaa1..00000000 --- a/CHANGELOG +++ /dev/null @@ -1,165 +0,0 @@ - - =============================== - T I D o S F R A M E W O R K - =============================== - - ============ - Changelog: - ============ - - * v0.1.0 - - Just an init to this repository. A long way to go. - - Changelog: - This release features: - - - fl00d - A UDP Flooder (basic). - - gsearch - A google searching facility via console. - - info - A module that displays info about your network. - - webex - A module for determining website status. - - * v0.1.1 - - Small improvements in code. Added a new module. - - Changelog: - This release features a new module. - - - piweb - For pinging websites. - - Separate bug fixes. - - * v0.1.2 - - Added some serious bug fixes occurring at setup.py. That was real quick. - - Changelog: - - This feature includes just some small bug fixes. - - Addition of setup.py file. - - * v1.0.0 - - A comprehensive penetration testing toolkit framework ready. Now on the way to be the best framework ever released. - - Changelog: - - The biggest release till now. - - Total modules now comprise of 73 modules. - - Complete change of interface for easy interactions. - - This is something real. *fire* - - * v1.1.0 - - Small minor additions to the framework. - - Changelog: - - Huge bug fixes. - - Addition of CRLF to VulnLysis - - Proper configuration of the payload database. - - * v1.1.1 - - Addition of modules + bug fixes. - - Changelog: - - Added 3 more brutemods_ modules. - - Bug fix to the main modules. - - Brute database updated with more default protocol changes. - - Addition of pre-config. database. - - * v1.2.0 - - Huge bug fixes + Utility Modules Additions. - - Changelog: - - Bug fixes (huge). - - Added 4 modules to _brutemods_. - - Updates to the payload database + fuzz database. - - Improvements to the cloudflare misconfig. module. - - Operating System Detection Module improved. - - * v1.2.1 - - Major code changes. Improvements to the ActiveRecon modules. (thanks to CodeBy Forum). - - Changlog: - - Added port scan support for OS fingerprinting module. - - Replaced all recursion string collisons. big bug fix - - Added support for minor updates via git pull. - - Added fix to the core end modules. - - * v1.2.2 - - Improvements to the core features. Bug fixes. - - Changelog: - - Added Exception Handling feature globally. - - Improved the crlf module. - - Fixed erring code at Host Header Injection module. - - Updated the payload database with new payloads. - - * v1.3.0 - - Additions to OSINT modules. Improvements to other parts of `ActiveRecon`. - - Changelog: - - Addition and improvements to `ActiveRecon` modules. - - New module added `Web Technology Enumeration`. - - 3 new modules added to `PassiveRecon` modules: - - Enumeration via Google Groups. - - PasteBin Posts Gatherer. - - LinkedIn Gathering. - - * v1.3.1 - - Addition of the new modules + bug fixes. - - Changelog: - - New module added LDAP injection. - - Improvements to the updater module. - - Addition of ldap_payloads at the payload-db. - - Other minor bug fixes. - - * v1.4.0 - - Big release with lots of changes. - - Changelog: - - Addition of 4 major modules under ScanEnum and VulnLysis. - - Addition of HTML Payloads to the database. - - Support for API keys globally. - - Major ImportError bug fixes. - - Re-written 6 major modules for efficiency. - - * v1.5.0 - - Wholesome release with 8 new modules. - - Changelog: - - Addition of 5 new modules under ActiveRecon Phase. - - Addition of 3 new modules under Vulnerability Analysis. - - Bug fixes to the BruteMods module. - - Most of PassiveRecon Phase modules rewritten for the better. - - Lastly, some minor bug fixes and stuff. - - * v1.5.1 - - Small minor release with 5 new modules. - - Changelog: - - Separated the auxillaries from mainstream phase. - - Addition of 2 new modules to auxillaries. - - Addition of 3 new modules under OSINT. - - Minor bug fixes. - - Code optimised for better threading. - - Removal of lots of unnecessary code. - - * v1.5.2 [latest release] (#stable) - - Addition of 2 new modules and picks to mainstream framework. - - Changelog: - - TIDoS Framework now boasts of a century of modules. - - A new module added under Passive Recon. - - Full Contact Module saw its final touch. - - Another new module added under Auxillaries. - - Few bug fixes and stuff. diff --git a/DISCLAIMER b/DISCLAIMER deleted file mode 100644 index e3186b45..00000000 --- a/DISCLAIMER +++ /dev/null @@ -1,16 +0,0 @@ - - ===================================================== - T I D O S F R A M E W O R K v1.5 - ===================================================== - - TIDoS is a open-source tool developed as a royalty-free website penetration testing tool. - - I was developed by Pinaxx Robinson, known by the name @_tID_ aka The Infected Drake of Team CodeSploit. - - This is to make you note that I was purely developed for Penetration Testing purposes. The developer is not responsible for any damage or data loss due to my misuse. If you intend to use me for any malicious purposes, use it at your own risk. ;) - - Also by using this tool, you agree to be a awesome person. Try to help others, strive not being a scriptkid, and do not pollute the community with unwanted stuff. - - You can edit these scripts within me as per your own needs, provided you use it only for yourself. If u wanna publish me again in a reformed appearance, give the developer some credits... :) - - diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..f288702d --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/README.md b/README.md index 324affbf..50297bd8 100644 --- a/README.md +++ b/README.md @@ -1,68 +1,159 @@ -

- -![Python](https://img.shields.io/badge/Python-2.7-green.svg) ![TIDoS](https://img.shields.io/badge/TIDoS-v1.5.2-blue.svg) ![Supported OS](https://img.shields.io/badge/Supported%20OS-Linux-yellow.svg) ![License](https://img.shields.io/badge/License-GPLv3-orange.svg) ![Build](https://img.shields.io/badge/Build-0088-red.svg) ![Modules](https://img.shields.io/badge/Modules-102-blue.svg) ![Status](https://img.shields.io/badge/Build%20Status-passing-brightgreen.svg) ![Stage](https://img.shields.io/badge/Release-Stable-green.svg) - -# The TIDoS Framework -TIDoS Framework is a comprehensive web-app audit framework. `let's keep this simple` +

+
+ TIDoS +
+

+ +

+ + + + + +
+ The Offensive Web Application Penetration Testing Framework. +

+ +> __IMPORTANT__: +> +> The new Qt5 interface is complete, but has additional dependencies. Take a look at the updated installation instructions. ### Highlights :- -The main highlights of this framework is: -- [x] TIDoS Framework now boasts of a century of modules. -- [x] A complete versatile framework to cover up everything from Reconnaissance to Vulnerability Analysis. -- [x] Has 5 main phases, subdivided into __14 sub-phases__ consisting a total of __102 modules__. -- [x] Reconnaissance Phase has 47 modules of its own (including active and passive recon, information disclosure modules). -- [x] Scanning & Enumeration Phase has got 15 modules (including port scans, WAF analysis, etc) -- [x] Vulnerability Analysis Phase has 35 modules (including most common vulnerabilites in action). -- [x] Exploits Castle has only 1 exploit. `(purely developmental)` -- [x] And finally, Auxillaries have got 4 modules. `under dev.` -- [x] All four phases each have a `Auto-Awesome` module which automates every module for you. -- [x] You just need the domain, and leave everything is to this tool. -- [x] TIDoS has full verbose out support, so you'll know whats going on. -- [x] Fully user friendly interaction environment. `(no shits)` - - +Here is some light on what the framework is all about: +- A complete versatile framework to cover up everything from Reconnaissance to Vulnerability Analysis. +- Has 5 main phases, subdivided into __14 sub-phases__ consisting a total of __108 modules__. +- Reconnaissance Phase has 50 modules of its own (including active and passive recon, information disclosure modules). +- Scanning & Enumeration Phase has got 16 modules (including port scans, WAF analysis, etc) +- Vulnerability Analysis Phase has 37 modules (including most common vulnerabilities in action). +- Exploits Castle has only 1 exploit. `(purely developmental)` +- And finally, Auxiliaries have got 4 modules. `more under development` +- All four phases each have an `Auto-Awesome` module which automates every module for you. +- huge performance boost through multiprocessing +- Piping Attacks through Tor (not implemented everywhere yet) +- You just need the domain, and leave everything is to this tool. +- TIDoS has full verbose out support, so you'll know whats going on. +- Attacking now even easier with a new GUI + +### Main new features +- the programming language: TIDoS is fully ported to Python3 +- the interface: TIDoS presents a new, Metasploit-like console interface +- Parallelisation: TIDoS uses multiprocessing to speed up attacks +- An alternative CLI interface for faster interaction with one specific module +- Anonymity: Attacking through Tor is possible (95% done) +- Module Completion: Some modules have been feature-extended (e.g. more evasion, supporting more than 1 query parameter) +- Some new modules: arpscan +- A Graphical User Interface for easier interaction with the toolkit +- Supports non-default http(s) ports + +### Upcoming +- results of modules will be stored in a database +- new modules: nikto&photon ### Installation :- +#### Installation Script (Globally) : + +To install the framework globally in /opt, run the provided `core/install.py` script as root. After this, you can launch TIDoS simply by typing `tidos` on the command line. + +#### Manual Installation (Locally) : + * Clone the repository locally and navigate there: ``` -git clone https://github.com/theinfecteddrake/tidos-framework.git +git clone https://github.com/0xinfection/tidos-framework.git cd tidos-framework ``` -* Install the dependencies: + +TIDoS needs some libraries to run, which can be installed via `aptitude` or `dnf` Package Managers. +``` +sudo apt-get install libncurses5 libxml2 nmap tcpdump libexiv2-dev build-essential python3-pip libmariadbclient18 libmysqlclient-dev tor konsole +``` +Now after these dependencies are finished installing, we need to install the remaining Python Package dependencies, hence run: +``` +pip3 install -r requirements.txt +``` + +TIDoS uses Vailyn to scan for path traversals in a new, improved path traversal module. If you want to use that module, head to `https://github.com/VainlyStrain/Vailyn`, and follow the installation instructions there. + +Thats it. You now have TIDoS at your service. Fire it up using: +``` +python3 tidv2 #Qt5 interface +sudo python3 tidconsole.py #console interface +``` + +#### Docker image : + +You can build it from Dockerfile : ``` -chmod +x install -./install +git clone https://github.com/0xinfection/tidos-framework.git +cd tidos-framework/core/docker +docker build -t tidos . ``` - -Thats it! Now you are good to go! Now lets run the tool: +To run TIDoS : + ``` +docker run --interactive --tty --rm tidos bash tidos ``` -### Getting Started :- +Update: TIDoS is now available on Docker Hub. Install and run the container like this: -TIDoS is made to be comprehensive and versatile. It is a highly flexible framework where you just have to select and use modules. +``` +docker run -it vainlystrain/tidos-framework +``` -But before that, you need to set your own `API KEYS` for various OSINT purposes. To do so, open up `API_KEYS.py` under `files/` directory and set your own keys and access tokens for `SHODAN`, `CENSYS`, `FULL CONTACT`, `GOOGLE` and `WHATCMS`. Public `API KEYS` and `ACCESS TOKENS` for `SHODAN` and `WHATCMS` have been provided with the TIDoS release itself. You can still add your own... `no harm!` +#### Updating TIDoS : -Finally, as the framework opens up, enter the website name `eg. http://www.example.com` and let TIDoS lead you. Thats it! Its as easy as that. +To get the current version of TIDoS, move into the installation folder and perform `(sudo) git pull #sudo if installed by install.py`. Alternatively, you can run the `fetch` command in tidconsole. -> Recommended: -> - Follow the order of the tool (Run in a schematic way). +### Getting Started :- + +To get started, you need to set your own `API KEYS` for various OSINT & Scanning and Enumeration purposes. To do so, open up `API_KEYS.py` under `files/` directory and set your own keys and access tokens for `SHODAN`, `CENSYS`, `FULL CONTACT`, `GOOGLE` and `WHATCMS`. + +> __GOOD NEWS__: > -> `Reconnaissance ➣ Scanning & Enumeration ➣ Vulnerability Analysis` +> The latest release of TIDoS includes all API KEYS and ACCESS TOKENS for `SHODAN`, `CENSYS`, `FULL CONTACT`, `GOOGLE` and `WHATCMS` by default. I found these tokens on various repositories on GitHub itself. __You can now use all the modules__ which use the API KEYS. :) -To update this tool, use `tidos_updater.py` module under `tools/` folder. +#### Commands :- + +``` +__ __ + ! attack Attack specified target(s) M + : clear Clear terminal. : + V creds Handle target credentials. + : fetch Check for and install updates. : + : find Search a module. : + help Show help message. : + info Show description of current module. M + : intro Display Intro. : + : leave Leave module. M + list List all modules of a category. : + : load Load module. : + : netinfo Show network information. : + : opts Show options of current module. M + phpsploit Load the phpsploit framework. : + (needs to be downloaded externally) + : processes Set number of processes in parallelis. : + q Terminate TIDoS session. : + : sessions Interact with cached sessions. : + : set Set option value of module. M + : tor Pipe Attacks through the Tor Network. : + vicadd Add Target to list. : + vicdel Delete Target from list. : + viclist List all targets. : + + Avail. Cmds + M needs loaded modvle + V [! potentially] need loaded target(s) +``` ### Flawless Features :- -TIDoS Framework presently supports the following: `and is under active development` +TIDoS presently supports the following: `and more modules are under active development` * __Reconnaissance + OSINT__ - + Passive Reconnaissance: + + __Passive Reconnaissance:__ - Nping Enumeration `Via external APi` - WhoIS Lookup `Domain info gathering` - GeoIP Lookup `Pinpoint physical location` @@ -87,7 +178,7 @@ TIDoS Framework presently supports the following: `and is under active developme - Censys Intel Gathering `Domain Based` - Threat Intelligence Gathering `Bad IPs` - + Active Reconnaissance + + __Active Reconnaissance:__ - Ping Enumeration `Advanced` - CMS Detection `(185+ CMSs supported)` `IMPROVED` - Advanced Traceroute `IMPROVED` @@ -98,6 +189,7 @@ TIDoS Framework presently supports the following: `and is under active developme - Examine SSL Certificate `Absolute` - Apache Status Disclosure Checks `File Based` - WebDAV HTTP Enumeration `PROFIND & SEARCH` + - PHPInfo File Enumeration `via Bruteforce` - Comments Scraper `Regex Based` - Find Shared DNS Hosts `Name Server Based` - Alternate Sites Discovery `User-Agent Based` @@ -106,9 +198,11 @@ TIDoS Framework presently supports the following: `and is under active developme - Common Backup Locations `.bak, .db, etc.` - Common Password Locations ` .pgp, .skr, etc.` - Common Proxy Path Configs. `.pac, etc.` + - Multiple Index Paths `index, index1, etc.` - Common Dot Files `.htaccess, .apache, etc` + - Common Logfile Locations `.log, .changelog, etc` - + Information Disclosure + + __Information Disclosure:__ - Credit Cards Disclosure `If Plaintext` - Email Harvester `IMPROVED` - Fatal Errors Enumeration `Includes Full Path Disclosure` @@ -124,19 +218,21 @@ TIDoS Framework presently supports the following: `and is under active developme - TCP SYN Scan `Highly reliable` - TCP Connect Scan `Highly Reliable` - XMAS Flag Scan `Reliable Only in LANs` - - Fin Flag Scan `Reliable Only in LANs` + - FIN Flag Scan `Reliable Only in LANs` - Port Service Detector + Web Technology Enumeration `Absolute` + + Complete SSL Enumeration `Absolute` + Operating System Fingerprinting `IMPROVED` + Banner Grabbing of Services `via Open Ports` + Interactive Scanning with NMap `16 preloaded modules` - + Enumeration Domain-Linked IPs `Using CENSYS Database` - + Crawlers + + Internet Wide Servers Scan `Using CENSYS Database` + + Web and Links Crawlers - Depth 1 `Indexed Uri Crawler` - Depth 2 `Single Page Crawler` - Depth 3 `Web Link Crawler` + + ARP Scanner `NEW` -+ __Vulnerability Analysis__ +* __Vulnerability Analysis__ __Web-Bugs & Server Misconfigurations__ @@ -148,7 +244,7 @@ TIDoS Framework presently supports the following: `and is under active developme - `X-FRAME-OPTIONS` Header Checks + Security on Cookies - `HTTPOnly` Flag - - `Secure` Flag + - `Secure` Flag on Cookies + Cloudflare Misconfiguration Check - DNS Misconfiguration Checks - Online Database Lookup `For Breaches` @@ -158,7 +254,7 @@ TIDoS Framework presently supports the following: `and is under active developme - Missing `SPF` Records - Missing `DMARC` Records + Host Header Injection - - Port Based `Over HTTP 80` + - Port Based `Web Socket Based` - `X-Forwarded-For` Header Injection + Security Headers Analysis `Live Capture` + Cross-Site Tracing `HTTP TRACE Method` @@ -174,7 +270,7 @@ TIDoS Framework presently supports the following: `and is under active developme - Parameter Based - Pre-loaded Path Based + OS Command Injection `Linux & Windows (RCE)` - + Path Traversal `(Sensitive Paths)` + + Path Traversal `ENHANCED` + Cross-Site Request Forgery `Absolute` + SQL Injection + Error Based Injection @@ -189,15 +285,17 @@ TIDoS Framework presently supports the following: `and is under active developme - Auto-gathering `IMPROVED` + LDAP Injection `Parameter Based` + HTML Injection `Parameter Based` - + Bash Command Injection `ShellShock` + + Bash Command Injection `ShellShock` + + Apache Struts Shock `Apache RCE` + + XPATH Injection `Parameter Based` + Cross-Site Scripting `IMPROVED` - Cookie Value Based - Referer Value Based - User-Agent Value Based - Parameter Value Based `Manual` + Unvalidated URL Forwards `Open Redirect` - + PHP Code Injection `Windows + Linux` - + HTTP Response Splitting `CRLF Injection` + + PHP Code Injection `Windows + Linux RCE` + + CRLF Injection `HTTP Response Splitting` - User-Agent Value Based - Parameter value Based `Manual` + Sub-domain Takeover `50+ Services` @@ -212,7 +310,7 @@ TIDoS Framework presently supports the following: `and is under active developme - SSH Protocol Bruteforce - POP 2/3 Protocol Bruteforce - SQL Protocol Bruteforce - - XMPP Protocol Bruteforce + - (XMPP Protocol Bruteforce) `BROKEN:DEP` - SMTP Protocol Bruteforce - TELNET Protocol Bruteforce @@ -228,47 +326,22 @@ TIDoS Framework presently supports the following: `and is under active developme + ShellShock ### Other Tools: -- `net_info.py` - Displays information about your network. Located under `tools/`. -- `tidos_updater.py` - Updates the framework to the latest release via signature matching. Located under `tools/'. +- `net_info.py` - Displays information about your network. Accessible from 'netinfo' command. ### TIDoS In Action: - - - - +Lets see a demonstration of TIDoS in action: - - - +[![asciicast](https://asciinema.org/a/359477.svg)](https://asciinema.org/a/359477) ### Version: ``` -v1.5.2 [latest release] [#stable] +v2.0.1-5 [latest release] [#beta] ``` -### Upcoming: - -There are some bruteforce modules to be added: -- Some more of Enumeraton & Information Disclosure modules. -- Lots more of OSINT & Stuff (let that be a suspense). -- More of Auxillary Modules. -- Some Exploits are too being worked on. - -### Known Bugs: - -This version of TIDoS is purely developmental and is presently `stable`. There are bugs in resolving the `[99] Back` at various end-points which results in blind fall-backs. Though I have added global exception handling, still, there maybe bugs out there. Also TIDoS needs to develop more on logging all info displayed on the screen `(help needed)`. - ### Disclaimer: -TIDoS is provided as a offensive web application audit framework. It has built-in modules which can reveal potential misconfigurations and vulnerabilties in web applications which could possibly be exploited maliciously. - -__THEREFORE, I AM NOT EXCLUSIVELY RESPONSIBLE FOR ANY MISUSE OF THIS TOOLKIT.__ +TIDoS is provided as an offensive web application audit framework. It has built-in modules which can reveal potential misconfigurations and vulnerabilties in web applications which could possibly be exploited maliciously. -### Final Words: +__THEREFORE, NEITHER THE AUTHOR NOR THE CONTRIBUTORS ARE RESPONSIBLE FOR ANY MISUSE OR DAMAGE DUE TO THIS TOOLKIT.__ -This project is presently under active development so you may want to put it on a watch, since it is updated frequently `(you can take a look at past commits history)`. This project is one of the best frameworks I have ever built and I would really like your constructive criticism, suggestions and help in converting this project into the best web penetration testing framework ever built `and trust me, it will be ;)`. - -> Thank you, -> -> @_tID | CodeSploit diff --git a/core/.directory b/core/.directory new file mode 100644 index 00000000..acdd027f --- /dev/null +++ b/core/.directory @@ -0,0 +1,2 @@ +[Desktop Entry] +Icon=/home/vainlystrain/Pictures/vaileava11.png diff --git a/core/Core/__init__.py b/core/Core/__init__.py new file mode 100644 index 00000000..8d1c8b69 --- /dev/null +++ b/core/Core/__init__.py @@ -0,0 +1 @@ + diff --git a/core/Core/__pycache__/__init__.cpython-37.pyc b/core/Core/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 00000000..646a3499 Binary files /dev/null and b/core/Core/__pycache__/__init__.cpython-37.pyc differ diff --git a/core/Core/__pycache__/colors.cpython-37.pyc b/core/Core/__pycache__/colors.cpython-37.pyc new file mode 100644 index 00000000..f4f04f36 Binary files /dev/null and b/core/Core/__pycache__/colors.cpython-37.pyc differ diff --git a/core/Core/colors.py b/core/Core/colors.py new file mode 100644 index 00000000..1a278a6f --- /dev/null +++ b/core/Core/colors.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 + +#-:-:-:-:-:-:-:-:-:-:-:-:# +# TIDoS Framework # +#-:-:-:-:-:-:-:-:-:-:-:-:# + +#This module requires TIDoS Framework +#https://github.com/0xInfection/TIDoS-Framework + +############################### +class color: + PURPLE = '\033[95m' + CYAN = '\033[96m' + DARKCYAN = '\033[36m' + BLUE = '\033[94m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + CURSIVE = '\033[3m' + END = '\033[0m' + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + #TR1 = '\033[0m\033[48;2;58;49;58m\033[38;2;225;214;225m\uE0B0' + #TR2 = '\033[0m\033[38;2;225;214;225m\uE0B0' + #TR3 = '\033[0m\033[48;2;225;214;225m\033[38;2;58;49;58m\uE0B0' + #TR4 = '\033[0m\033[38;2;58;49;58m\uE0B0' + #TR5 = '\033[0m\033[48;2;225;214;225m\033[38;2;19;14;19m\uE0B0' + #TR6 = '\033[0m\033[48;2;58;49;58m\033[38;2;19;14;19m\uE0B0' + + TR1 = '\033[0m\033[48;2;58;49;58m\033[38;2;225;214;225m|' + TR2 = '\033[0m\033[38;2;225;214;225m|' + TR3 = '\033[0m\033[48;2;225;214;225m\033[38;2;58;49;58m|' + TR4 = '\033[0m\033[38;2;58;49;58m|' + TR5 = '\033[0m\033[48;2;225;214;225m\033[38;2;19;14;19m|' + TR6 = '\033[0m\033[48;2;58;49;58m\033[38;2;19;14;19m|' + VBG = '\033[48;2;19;14;19m' + TITLE = '\033[38;2;85;72;85m' + BAR = '\033[38;2;225;214;225m' + #TR1 = '\033[48;2;58;49;58m\033[38;2;225;214;225m\u25B6' + +W = '\033[0m\033[1;0m' # white (normal) +#R = '\033[1;31m' # red +R = '\033[0m\033[38;2;58;49;58m\033[1m' +#G = '\033[0m\033[1m' # green +#G = '\033[0m\033[48;2;255;255;255m\033[38;2;58;49;58m\033[1m' +G = '\033[0m\033[48;2;225;214;225m\033[38;2;58;49;58m\033[1m' +#G = '\033[0m\033[48;2;85;72;85m\033[38;2;225;214;225m' +#O = '\033[0m\033[1m' # orange +O = '\033[0m\033[48;2;58;49;58m' +B = '\033[0m\033[1m' # blue +#P = '\033[0m\033[1m' # purple +P = '\033[0m\033[38;2;58;49;58m\033[3m' +C = '\033[0m\033[1m' # cyan +GR = '\033[0m\033[1m' # gray +T = '\033[0m\033[1m' # tan +RB = '\033[48;2;58;49;58m' +RC = '\033[0m\033[38;2;58;49;58m\033[3m' +RD = '\033[0m\033[38;2;58;49;58m' +############################### diff --git a/core/__init__.py b/core/__init__.py index 73a0d197..8d1c8b69 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -1,11 +1 @@ -#!/usr/bin/env python2 -# coding: utf-8 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -pass + diff --git a/core/__pycache__/__init__.cpython-37.pyc b/core/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 00000000..33b2aef7 Binary files /dev/null and b/core/__pycache__/__init__.cpython-37.pyc differ diff --git a/core/__pycache__/colors.cpython-37.pyc b/core/__pycache__/colors.cpython-37.pyc new file mode 100644 index 00000000..697a5602 Binary files /dev/null and b/core/__pycache__/colors.cpython-37.pyc differ diff --git a/core/__pycache__/variables.cpython-37.pyc b/core/__pycache__/variables.cpython-37.pyc new file mode 100644 index 00000000..cf485848 Binary files /dev/null and b/core/__pycache__/variables.cpython-37.pyc differ diff --git a/core/activeban.py b/core/activeban.py deleted file mode 100644 index eccfd03b..00000000 --- a/core/activeban.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python2 -# coding: utf-8 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -from colors import * -import time - -def activeban(): - - time.sleep(0.4) - print G+' +----------------+' - print G+' | '+O+'ACTIVE RECON'+G+' |' - print G+' +----------------+' - time.sleep(0.3) - print '' - print B+' [1]'+C+' Ping/NPing Enumeration'+W+' (Adaptative+Debug)' - time.sleep(0.1) - print B+' [2]'+C+' Grab HTTP Headers'+W+' (Live Capture)' - time.sleep(0.1) - print B+' [3]'+C+' Find Allowed HTTP Methods'+W+' (Via OPTIONS)' - time.sleep(0.1) - print B+' [4]'+C+' Examine robots.txt and sitemap.xml' - time.sleep(0.1) - print B+' [5]'+C+' Scrape Comments from Webpage'+W+' (Regex Based)' - time.sleep(0.1) - print B+' [6]'+C+' Perform Advanced Traceroute'+W+' (TTL Based)' - time.sleep(0.1) - print B+' [7]'+C+' Find Shared DNS Hosts'+W+' (NameServer Based)' - time.sleep(0.1) - print B+' [8]'+C+' Examine SSL Certificate'+W+' (Absolute)' - time.sleep(0.1) - print B+' [9]'+C+' CMS Detection '+W+'(185+ CMSs supported)' - time.sleep(0.1) - print B+' [10]'+C+' Apache Status Disclosure'+W+' (File Based)' - time.sleep(0.1) - print B+' [11]'+C+' WebDAV HTTP Enumeration'+W+' (SEARCH, PROFIND)' - time.sleep(0.1) - print B+' [12]'+C+' Enumerate Server behind website'+W - time.sleep(0.1) - print B+' [13]'+C+' Alternate Sites'+W+' (User-Agent Based)' - time.sleep(0.1) - print B+' [14]'+C+' Common File Bruteforce'+W+' (5 modules)\n' - time.sleep(0.1) - print B+' [A]'+C+' The Auto-Awesome Module\n' - time.sleep(0.1) - print B+' [99]'+C+' Back\n' - diff --git a/core/activeo.py b/core/activeo.py deleted file mode 100644 index dadbb53b..00000000 --- a/core/activeo.py +++ /dev/null @@ -1,232 +0,0 @@ -#!/usr/bin/env python2 -# coding: utf-8 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import sys -import os -import time -import subprocess -import random -from random import randint -sys.path.append('modules/ActiveRecon/') - -from piwebenum import * -from grabhead import * -from httpmethods import * -from robot import * -from apachestat import * -from dav import * -from sharedns import * -from commentssrc import * -from sslcert import * -from activeban import * -from filebrute import * -from traceroute import * -from cms import * -from serverdetect import * -from altsites import * -from colors import * - -def activeo(web): - - print " [!] Module Selected : Active Reconnaissance\n\n" - activeban() - print '' - time.sleep(0.3) - v = raw_input (''+GR+' [#] \033[1;4mTID\033[0m'+GR+' :> ' + color.END) - print '' - if v.strip() == '1': - print C+' [!] Type Selected : Ping/NPing Enumeration' - piwebenum(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - activeo(web) - - elif v.strip() == '2': - print C+' [!] Type Selected : Grab HTTP Headers' - grabhead(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - activeo(web) - - elif v.strip() == '3': - print C+' [!] Type Selected : HTTP Allowed Methods' - httpmethods(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - activeo(web) - - elif v.strip() == '4': - print C+' [!] Type Selected : robots.txt and sitemap.xml Hunt' - robot(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - activeo(web) - - elif v.strip() == '5': - print C+' [!] Type Selected : Scrape Comments' - commentssrc(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - activeo(web) - - elif v.strip() == '6': - print C+' [!] Type Selected '+B+': Traceroute' - traceroute(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - activeo(web) - - elif v.strip() == '7': - print C+' [!] Type Selected : DNS Hosts' - sharedns(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - activeo(web) - - elif v.strip() == '8': - print C+' [!] Type Selected : SSL Certificate' - sslcert(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - activeo(web) - - elif v.strip() == '9': - print C+' [!] Type Selected : CMS Detection' - cms(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - activeo(web) - - elif v.strip() == '10': - print C+' [!] Type Selected : Apache Status' - apachestat(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - activeo(web) - - elif v.strip() == '11': - print C+' [!] Type Selected : WebDAV HTTP Enumeration' - dav(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - activeo(web) - - elif v.strip() == '12': - print C+' [!] Type Selected : Server Detection' - serverdetect(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - activeo(web) - - elif v.strip() == '13': - print C+' [!] Type Selected : Alternate Sites ' - altsites(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - activeo(web) - - elif v.strip() == '14': - print C+' [!] Type Selected : File Bruteforcers' - filebrute(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - activeo(web) - - elif v.strip() == 'A': - print C+' [!] Type Selected : All Modules' - time.sleep(0.5) - print C+' [*] Firing up module -->'+B+' Ping Enum' - piwebenum(web) - print C+' [!] Module Completed -->'+B+' PIng\n' - - time.sleep(1) - print C+' [*] Firing up module -->'+B+' Grab Headers' - grabhead(web) - print C+'\n [!] Module Completed -->'+B+' Grabhead\n' - - time.sleep(1) - print C+' [*] Firing up module -->'+B+' Robots.txt Hunter' - robot(web) - print C+'\n [!] Module Completed -->'+B+' Robot Hunter\n' - time.sleep(1) - - print C+' [*] Firing up module -->'+B+' Comments Scraper' - commentssrc(web) - print C+'\n [!] Module Completed -->'+B+' Comments Src\n' - time.sleep(1) - - print C+' [*] Firing up module -->'+B+' Traceroute' - traceroute(web) - print C+'\n [!] Module Completed -->'+B+' Traceroute\n' - time.sleep(1) - - print C+' [*] Firing up module -->'+B+' Shared DNS Servers' - sharedns(web) - print C+'\n [!] Module Completed -->'+B+' Shared DNS Servers\n' - time.sleep(1) - - print C+' [*] Firing up module -->'+B+' SSl Certificate Info' - sslcert(web) - print C+'\n [!] Module Completed -->'+B+' SSl Cert\n' - time.sleep(1) - - print C+' [*] Firing up module -->'+B+' CMS Detection' - cms(web) - print C+'\n [!] Module Completed -->'+B+' CMS Detect\n' - time.sleep(1) - - print C+' [*] Firing up module -->'+B+' WebDAV HTTP Profiling' - dav(web) - print C+'\n [!] Module Completed -->'+B+' WebDAV HTTP Profiling\n' - time.sleep(1) - - print C+' [*] Firing up module -->'+B+' Apache Status' - apachestat(web) - print C+'\n [!] Module Completed -->'+B+' Apache Status\n' - time.sleep(1) - - print C+' [*] Firing up module -->'+B+' OS Fingerprinting' - osdetect(web) - print C+'\n [!] Module Completed -->'+B+' OS Detect\n' - time.sleep(1) - - print C+' [*] Firing up module -->'+B+' File bruteforcer' - filebrute(web) - print C+'\n [!] Module Completed -->'+B+' File Bruteforcer\n' - time.sleep(1) - - print C+' [*] Firing up module -->'+B+' Server Detection' - serverdetect(web) - print C+'\n [!] Module Completed -->'+B+' Server Detect\n' - time.sleep(1) - - print C+' [*] Firing up module -->'+B+' Alt. Sites' - altsites(web) - print C+'\n [!] Module Completed -->'+B+' Alt. Sites\n' - time.sleep(1) - - print C+'\n [!] All scantypes have been tested on target...' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - print C+' [*] Going back to menu...' - time.sleep(3) - os.system('clear') - activeo(web) - - elif v.strip() == '99': - print C+' [*] Back to the menu !' - os.system('clear') - - else: - dope = ['You high dude?','Shit! Enter a valid option','Whoops! Thats not an option','Sorry! You just typed shit'] - print dope[randint(0,3)] - time.sleep(0.7) - os.system('clear') - activeo(web) - diff --git a/core/agree.py b/core/agree.py deleted file mode 100644 index acddd24e..00000000 --- a/core/agree.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python2 -# coding: utf-8 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import os -import sys -import platform -import time -from agreement import * -from colors import * - -def agree(): - - os.system('clear') - if str(platform.system()) != "Linux": - sys.exit(R+" [!] " + color.UNDERLINE + "\033[91m" + "You are not using a Linux Based OS! Linux is a must-have for this script!" + color.END) - if not os.geteuid() == 0: - sys.exit(" [!] " + color.UNDERLINE + "\033[91m" + "Must be run as root. :) " + color.END) - if 'no' in open('agree').read(): - agreement() - - a1 = raw_input(O+' [0x00] '+G+'Do you agree to these terms and conditions? :> '+C) - if a1 == "yes" or a1 == 'y' or a1 == 'Y' or a1 == 'Yes' or a1 == 'yo' or a1 == 'YES' or a1 == 'yep' or a1 == 'Yep' or a1 == 'YEP': - print G+' [0x01] '+O+'Thats awesome! Move on...' - time.sleep(3) - FILE = open("agree","w") - FILE.write('yes') - FILE.close() - - else: - print O+' [0x0F] '+R+'You have to agree!' - time.sleep(1) - sys.exit(0) diff --git a/core/agreement.py b/core/agreement.py deleted file mode 100644 index 9ed9a449..00000000 --- a/core/agreement.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -def agreement(): - - print """ -\033[1;95m - ========================================== - T I D O S F R A M E W O R K v1.5 - ========================================== -\033[1;37m - TIDoS is a open-source toolkit developed as a comprehensive web-app audit framework. - - TIDoS was developed by \033[1;33mPinaxx Robinson\033[1;37m, known by the name \033[1;36m@_tID\033[1;37m aka \033[1;36mThe Infected Drake\033[1;37m of \033[1;33mTeam CodeSploit.\033[1;37m - - This is to make you note that TIDoS was purely developed for auditing purposes. The developer is not responsible for any damage or data loss due to misuse. If you intend to use this for any malicious purposes, use it at your own risk... ;) - - Also by using this tool, you agree to be a awesome person. Try to help others, and do not pollute the community with unwanted stuff. - - You can edit these scripts within this tool as per your own needs, provided you use it only for yourself. If you want to publish TIDoS again in a reformed appearance, give the developer some credits. :) \033[1;91m - - P.S. - If you find any bugs with this tool, report it. - -""" diff --git a/core/auxil.py b/core/auxil.py deleted file mode 100644 index d01916d4..00000000 --- a/core/auxil.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python2 -# coding: utf-8 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import sys -import os -import time -import subprocess -import random -from random import randint -from subprocess import call -sys.path.append('modules/AuxilMods/') -from auxilban import * -from encodeall import * -from honeypot import * -from hashes import * -from imgext import * - -def auxil(web): - - print '' - time.sleep(0.3) - v = raw_input(GR+' [#] \033[1;4mTID\033[0m'+GR+' :> ' + color.END) - print '' - if v == '1': - print ' [!] Type Selected : Generate Hashes' - hashes() - print '\n\n' - raw_input(O+' [+] Press '+GR+'Enter'+O+' to Continue...') - auxilban() - auxil(web) - - elif v == '2': - print ' [!] Type Selected : Encode Strings' - encodeall() - print '\n\n' - raw_input(O+' [+] Press '+GR+'Enter'+O+' to Continue...') - auxilban() - auxil(web) - - elif v == '3': - print ' [!] Type Selected : Extract Metadata' - imgext() - print '\n\n' - raw_input(O+' [+] Press '+GR+'Enter'+O+' to Continue...') - auxilban() - auxil(web) - - elif v == '4': - print ' [!] Type Selected : Honeypot Detector' - honeypot() - print '\n\n' - raw_input(O+' [+] Press '+GR+'Enter'+O+' to Continue...') - auxilban() - auxil(web) - - elif v == '99': - print GR+' [*] Going back!' - time.sleep(0.7) - os.system('clear') - - else: - dope = ['You high dude?','Shit! Enter a valid option','Whoops! Thats not an option','Sorry! You just typed shit'] - print dope[randint(0,3)] - time.sleep(0.7) - os.system('clear') - auxilban() - auxil(web) diff --git a/core/auxilban.py b/core/auxilban.py deleted file mode 100644 index d15b16b4..00000000 --- a/core/auxilban.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -from colors import * -import time -import os - -def auxilban(): - - os.system('clear') - print " [!] Module Selected : Auxillary Modules\n" - time.sleep(0.4) - print C+''' -\033[1;37m - ' . + . . ' + ' . . - . ' + . * . - + ' . + - . * . . . * . * - . \033[1;33m _\033[1;37m . . . . - . . \033[1;33m _ / | \033[1;37m . . * \033[1;33m _ \033[1;37m . . + - \033[1;33m| \_| | \033[1;37m + . \033[1;33m| | __ - \033[1;33m _ | |\033[1;37m . _ \033[1;33m| |/ | - + \033[1;33m| \ | \033[1;36m _/\_ \033[1;33m| | / | \ \033[1;37m + / - \033[1;33m| | \ \033[1;36m+/_\/_\+ \033[1;33m| | / | \ \033[1;37m . |\033[1;34m - ____\033[1;33m/____\--...\___ \033[1;36m\_||_/\033[1;34m ___...\033[1;33m|__\_\033[1;34m..\033[1;33m|____\____/\033[1;34m_______/_\033[1;34m - . . \033[1;36m|_|__|_| \033[1;34m . . . - . . . \033[1;36m_/ /__\ \_\033[1;34m . . . . - . . . . . . - . ' ' ' ' - ' \033[1;33m-=[\033[1;31m A U X I L L A R I E S \033[1;33m]=-\033[1;34m . ' . - . ' . . ' . -''' - time.sleep(0.3) - print '' - print B+' [1]'+C+' Generate Hashes from Strings'+W+'(4 Types) ' - time.sleep(0.1) - print B+' [2]'+C+' Encode Payload or Strings'+W+' (7 Types)' - time.sleep(0.1) - print B+' [3]'+C+' Extract Metadata from Images'+W+' (EXIF Data)\n' - time.sleep(0.1) - print B+' [4]'+C+' HoneyPot Probability'+W+' (ShodanLabs HoneyScore)\n' - time.sleep(0.1) - print B+' [99]'+C+' Back\n' - diff --git a/core/banner.py b/core/banner.py deleted file mode 100644 index 6db086fd..00000000 --- a/core/banner.py +++ /dev/null @@ -1,144 +0,0 @@ -# coding: utf-8 -#!/usr/bin/env python - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import os -from random import randint -from colors import * - -def banner(): - - header = """ -\033[1;31m - T H E - - ▄▀▀▀█▀▀▄ ▄▀▀█▀▄ ▄▀▀█▄▄ ▄▀▀▀▀▄ ▄▀▀▀▀▄ \033[1;37m - █ █ ▐ █ █ █ █ ▄▀ █ █ █ ██ ▐ - ▐ █ ▐ █ ▐ ▐ █ █ █ █ ▀▄ - █ █ █ █ ▀▄ ▄▀ ▀▄ █ \033[1;31m - ▄▀ ▄▀▀▀▀▀▄ ▄▀▄▄▄▄▀ ▀▀▀▀ █▀▀▀ - █ █ █ █ ▐ ▐ - ▐ ▐ ▐ ▐ - F R A M E W O R K - -""" - - - oblique = C + """ -\033[1;36m - T H E - - ███ ▄█ ████████▄ ▄██████▄ ▄████████ - ▀█████████▄ ███ ███ ▀███ ███ ███ ███ ███ \033[1;37m - ▀███▀▀██ ███▌ ███ ███ ███ ███ ███ █▀ - ███ ▀ ███▌ ███ ███ ███ ███ ███ - ███ ███▌ ███ ███ ███ ███ ▀███████████ - ███ ███ ███ ███ ███ ███ ███ \033[1;36m - ███ ███ ███ ▄███ ███ ███ ▄█ ███ - ▄████▀ █▀ ████████▀ ▀██████▀ ▄████████▀ - - F R A M E W O R K -""" - - modular = O + """ -\033[1;33m - ___________________________ - |\_________________________/|\ - || || \ - || \033[1;36mThe \033[1;33m|| | - || \033[1;36m TIDoS \033[1;33m|| | - || \033[1;36m Framework \033[1;33m|| | - || \033[1;33m|| | - || \033[1;34mWeb Application Audit \033[1;33m|| | - || \033[1;34m Framework \033[1;33m|| | - || \033[1;33m|| | - || \033[1;37mFrom: CodeSploit \033[1;33m|| / - ||_________________________|| / - |/_________________________\|/ - __\_________________/__/| - |_______________________|/ - ________________________ - /\033[0m\033[37moooo oooo oooo oooo \033[1;33m/| - /\033[0m\033[37mooooooooooooooooooooooo\033[1;33m/ / - /\033[0m\033[37mooooooooooooooooooooooo\033[1;33m/ / - /C=_____________________/_/ -""" - - fb = C + """ -\033[1;36m - ,------,-------------------------------------,------. - | | | | - | | \033[1;37mThe TIDoS Framework | | - | | \033[1;34m< CodeSploit > \033[1;36m | | - | | | | - | |-------------------------------------| | - | ||...................................|| | - | || || | - | || \033[1;33mHack Facebook? (y/n) \033[1;36m|| | - | || \033[1;34m---------------------- \033[1;36m|| | - | ||_______ \033[1;33m$ > (Input) \033[1;36m_______|| | - | || \033[1;32m$$$$$ \033[1;36m| | \033[1;32m$$$$$\033[1;36m || | - | ||-------'-------------------'-------|| | - `------'|___________________________________|`------' - -""" - codesploit = GR + """ - - MMMMMNMNMMMM$%&. - .DMMM lM$%. - .MMN \M$%,. - MN \M%$.. - .M. \033[1;36mXXXXXXXXXXXXX \033[1;37m\M%$M - .M \033[1;36m XXXXXXXXXXXXX \033[1;37m\l$%M. - M \033[1;36m XXX \033[1;37m\M$%M:. - M \033[1;36m XXX \033[1;37m\$MMM: - M \033[1;36m XXX \033[1;34mXXXXXXX \033[1;37m:$%MM: - :M \033[1;36m XXX \033[1;34mXX \033[1;37m :$%MMM: - M \033[1;36m XXX \033[1;34mXXXXXXX \033[1;37m M%$MM: - :M: \033[1;36m XXX \033[1;34m XX \033[1;37m M$%M:' - NM \033[1;36m XXXXXX\033[1;34mXXXXXXX\033[1;36mm \033[1;37m .M$%MM: - IMM. \033[1;36m XXXXXXXXXXXXX\033[1;36mm \033[1;37m.MD%4M' - :MM . .MM$%M' - $MM .MMM$%"' - MMMM. MMMMMM" - MMMMMMMMMMMMMMMMMM:*" -\033[1;33m - ╔═════════════════════════╗ - █ \033[1;31mC O D E S P L O I T\033[1;33m █ - ╚═════════════════════════╝ -""" - swan = """ -\033[1;33m - ...,ooooooooo...... - .o8888888888888888888888888o. - .o888888888888888888888888888888888o. - o8888888888A88"V888888888888888888888888o - o88888887"8" V V888 88888888888888888888o - o88888888 VV0 888888888888888888888 - o888888888 !/ 88888888888888888888o - .88888888888 88888V" "V8888888888. - o88888888888v \033[1;37mT H E \033[1;33m 8888" v8 8888888888o - 88888888888v 8888v v88 88888888888 - 888888888888 \033[1;37mT I D O S\033[1;33m 88888v "8888888888888 - 88888888888V ""M888 "888P8888888 - 88888888888v \033[1;37mF R A M E W O R K \033[1;33m 88P8888888 -\033[1;36m ______\033[1;33m8888888888888v.........................VF8888888\033[1;36m_______ - :::::::::::::::::::' :::::::::::::::: - ::::::::::::::::: .::::::: .:::::::::::::::::\033[1;34m - ::::::::::::::: \033[1;36mFrom:\033[1;34m ::::::: .::::::::::::::::::: - ::::::::::::::: \033[1;37mCodeSploit\033[1;34m :::::: ::: :::::::::::::::: - :::::::::::::::. ::::::. :: .::::::::::::::::\033[1;36m - :::::::::::::::: :::::::. .::::::::::::::::: - :::::::::::::::. . :::::::::::::::::::::::::::: -""" - headers = [header, oblique, modular, codesploit, fb, swan] - os.system('clear') - print headers[randint(0,5)] - - diff --git a/core/banner1.py b/core/banner1.py deleted file mode 100644 index 1c572d5d..00000000 --- a/core/banner1.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import time -from time import sleep -from colors import * - -def banner1(): - - print B+'[---] '+C+'The TIDoS Framework \033[36m| \033[1;37mVersion v1.5 \033[1;34m[---]' - sleep(0.2) - print B+'[---] [---]' - print B+'[---] \033[1;31m~ Author \033[1;31m: \033[1;33m@_tID ~ \033[1;34m[---]' - sleep(0.2) - print B+'[---] '+O+'~ github.com / \033[1;32mtheInfectedDrake ~ \033[1;34m[---]' - sleep(0.2) - print B+'[---] [---]' - sleep(0.2) - print B+'[---] \033[1;35m5 Phases | \033[1;31m14 Sub-Phases | \033[1;37m102 Modules \033[1;34m[---]' - sleep(0.2) - print '' - print B+' Welcome to '+C+'The TIDoS Framework (TTF)' - sleep(0.2) - print GR+' The TIDoS Framework is a project by '+R+'Team CodeSploit\n' - diff --git a/core/colors.py b/core/colors.py deleted file mode 100644 index 96ed4433..00000000 --- a/core/colors.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python2 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -############################### -class color: - PURPLE = '\033[95m' - CYAN = '\033[96m' - DARKCYAN = '\033[36m' - BLUE = '\033[94m' - GREEN = '\033[92m' - YELLOW = '\033[93m' - RED = '\033[91m' - BOLD = '\033[1m' - UNDERLINE = '\033[4m' - END = '\033[0m' - HEADER = '\033[95m' - OKBLUE = '\033[94m' - OKGREEN = '\033[92m' - WARNING = '\033[93m' - FAIL = '\033[91m' - -W = '\033[1;0m' # white (normal) -R = '\033[1;31m' # red -G = '\033[1;32m' # green -O = '\033[1;33m' # orange -B = '\033[1;34m' # blue -P = '\033[1;35m' # purple -C = '\033[1;36m' # cyan -GR = '\033[1;37m' # gray -T = '\033[1;93m' # tan -############################### diff --git a/core/crawlers.py b/core/crawlers.py deleted file mode 100644 index 9c27ff3b..00000000 --- a/core/crawlers.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding : utf-8 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This script is a part of TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import sys -import platform -import os -import time -import random -import subprocess -from random import * -sys.path.append('modules/ScanEnum/') - -from crawler1 import * -from crawler2 import * -from crawler3 import * -from crawlersban import * -from colors import * - -def crawlers(web): - - time.sleep(0.3) - print ' [!] Module Selected : Crawlers' - time.sleep(0.4) - crawlersban() - v = raw_input(O+' [#] TID :> ') - if v.strip() == '1': - print B+' [!] Module Selected :'+C+' Crawler (Depth1)' - crawler1(web) - time.sleep(1) - crawlers(web) - - elif v.strip() == '2': - print B+' [!] Module Selected :'+C+' Crawler (Depth 2)' - crawler2(web) - time.sleep(1) - crawlers(web) - - elif v.strip() == '3': - print B+' [!] Module Selected :'+C+' Crawler (Depth 3)' - crawler3(web) - time.sleep(1) - crawlers(web) - - elif v.strip() == '99': - print GR+' [*] Going back...' - time.sleep(0.5) - os.system('clear') - - elif v.strip() == 'A': - print W+'\n [!] Module Automater Initialized...' - sleep(0.5) - print B+' [*] Initializing Scan Type :'+C+' Crawler (Depth 1)' - crawler1(web) - print B+'\n [!] Scan Type Completed :'+C+' Crawler 1\n' - sleep(0.5) - print B+' [!] Initializing Scan Type :'+C+' Crawler (Depth 2)' - crawler2(web) - print B+'\n [!] Scan Type Completed :'+C+' Crawler 2\n' - sleep(0.5) - print B+' [!] Initializing Scan Type :'+C+' Crawler (Depth 3)' - crawler3(web) - print B+'\n [!] Scan Type Completed :'+C+' Crawler 3\n' - print G+' [+] All modules successfully completed!' - raw_input(GR+' [+] Press '+O+'Enter '+GR+'to continue...') - crawlers(web) - - else: - dope = ['You high dude?','Shit! Enter a valid option','Whoops! Thats not an option','Sorry! You just typed shit'] - print ' [-] '+dope[randint(0,3)] - sleep(1) - crawlers(web) - diff --git a/core/crawlersban.py b/core/crawlersban.py deleted file mode 100644 index b820d35e..00000000 --- a/core/crawlersban.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding : utf-8 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This script is a part of TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import time -from time import sleep -from colors import * - -def crawlersban(): - - print O+'\n +-----------------+' - print O+' | '+G+' CRAWLER TYPES '+O+' |' - print O+' +-----------------+\n' - print '' - sleep(0.2) - print B+" [1] "+C+"Crawler \033[1;0m(Depth 1)" - sleep(0.2) - print B+' [2] '+C+'Crawler \033[1;0m(Depth 2) ' - sleep(0.2) - print B+' [3] '+C+'Crawler \033[1;0m(Depth 3) ' - sleep(0.2) - print B+'\n [A] '+C+'Test both crawlers 1 by 1 ' - sleep(0.2) - print B+'\n [99] '+C+'Back \n' - diff --git a/core/database/database_module.py b/core/database/database_module.py new file mode 100644 index 00000000..1a7c73ea --- /dev/null +++ b/core/database/database_module.py @@ -0,0 +1,113 @@ +import sqlite3 + +# This module contains three functions: +# 1. save_data - inserts data into the database, and automatically creates tables and +# organizes the data as necessary. +# 2. retrieve_data - generates SQLite queries to retrieve the requested data from the +# database and returns the data. +# 3. get_info - reads the database and returns a dictionary that acts as an index to +# the data in the database. This dictionary is used by the menu function to build +# out the menu. +# The functions in this module take the following arguments: +# save_data and retrieve_data take the following arguments: +# database = database file name in the format "databasename.db". This will be the same +# throughout the session. +# module = the name of the top-level module you're in. Can't have spaces or special +# characters. Should be EXACTLY matching one of the following: +# "ReconANDOSINT", "ScanANDEnum", "VulnAnalysis", "Exploitation", or "AuxModules" +# lvl1, lvl2, lvl3 = the names of the next levels of modules, on down to the one you're +# in. If you don't go all the way down to lvl3, input Null. Can have spaces. Should +# exactly match the name of the module as written in the menu. +# data = should be the data generated by the module. It will be unchanged when it is +# passed in and out of the database. + +def save_data(database, module, lvl1, lvl2, lvl3, host, data): + connection = sqlite3.connect(database) + cursor = connection.cursor() + check_table = 'SELECT name FROM sqlite_master WHERE type=\'table\' AND name=\'{}\''.format(module) + cursor.execute(check_table) + checker = cursor.fetchone() + if checker == None: + create_table = 'CREATE TABLE {} (\ + id INTEGER PRIMARY KEY, \ + lvl1 text not null, \ + lvl2 text, \ + lvl3 text, \ + number int, \ + data blob);'.format(module) + cursor.execute(create_table) + connection.commit() + check_entries = 'SELECT number FROM {} WHERE lvl1="{}" AND lvl2="{}" AND lvl3="{}"'.format(module, lvl1, lvl2, lvl3) + cursor.execute(check_entries) + printable = cursor.fetchall() + ctr = 0 + high = [] + while ctr < len(printable): + high.append(printable[ctr][0]) + ctr += 1 + if high == []: + highest = 1 + else: + highest = max(high) + 1 + insert_into_table = 'INSERT INTO {} (lvl1, lvl2, lvl3, number, data) VALUES (?, ?, ?, ?, ?);'.format(module) + table_entry_parameters = (lvl1, lvl2, lvl3, highest, data) + cursor.execute(insert_into_table, table_entry_parameters) + connection.commit() + connection.close() + +def retrieve_data(database, module, lvl1, lvl2, lvl3, num): + connection = sqlite3.connect(database) + cursor = connection.cursor() + if lvl3 == "": + if lvl2 == "": + select_from_table = 'SELECT data FROM {} WHERE lvl1="{}" AND lvl2="" AND lvl3="" AND number={}'.format(module, lvl1, num) + else: + select_from_table = 'SELECT data FROM {} WHERE lvl1="{}" AND lvl2="{}" AND lvl3="" AND number={}'.format(module, lvl1, lvl2, num) + else: + select_from_table = 'SELECT data FROM {} WHERE lvl1="{}" AND lvl2="{}" AND lvl3="{}" AND number={}'.format(module, lvl1, lvl2, lvl3, num) + cursor.execute(select_from_table) + returned_data = cursor.fetchall() + connection.close() + return returned_data[0][0] + +# This function receives only the file name of the database. It reads what is in the +# database and returns a dictionary that acts as an index to the data saved in the +# database. This dictionary is read by the menu function to build out the menu. +def get_info(database): + modules = ['ReconANDOSINT', 'ScanANDEnum', 'VulnAnalysis', 'Exploitation', 'AuxModules'] + saved_modules = [] + data_saved = {} + connection = sqlite3.connect(database) + cursor = connection.cursor() + for module in modules: + check_table = 'SELECT name FROM sqlite_master WHERE type=\'table\' AND name=\'{}\''.format(module) + cursor.execute(check_table) + check = cursor.fetchone() + if check != None: + saved_modules.append(module) + for module in saved_modules: + if module not in data_saved.keys(): + data_saved[module] = {} + ctr = 1 + while True: + cmd = 'SELECT lvl1, lvl2, lvl3, number FROM {} WHERE id={}'.format(module, ctr) + cursor.execute(cmd) + grab_data = cursor.fetchone() + if grab_data == None: + break + else: + if grab_data[0] not in data_saved[module].keys(): + data_saved[module][grab_data[0]] = {} + if grab_data[1] != '': + if grab_data[1] not in data_saved[module][grab_data[0]].keys(): + data_saved[module][grab_data[0]][grab_data[1]] = {} + if grab_data[2] == '': + if grab_data[1] == '': + data_saved[module][grab_data[0]].update( {grab_data[3] : "Data"} ) + else: + data_saved[module][grab_data[0]][grab_data[1]].update( {grab_data[3] : "Data"} ) + else: + data_saved[module][grab_data[0]][grab_data[1]][grab_data[2]].update( {grab_data[3] : "Data"} ) + ctr += 1 + connection.close() + return data_saved \ No newline at end of file diff --git a/core/database/db_menu.py b/core/database/db_menu.py new file mode 100644 index 00000000..1dc1ebaf --- /dev/null +++ b/core/database/db_menu.py @@ -0,0 +1,108 @@ +from database_module import get_info, retrieve_data +import sys + +menu_dict = { + 'ReconANDOSINT' : 'Reconnaissance & OSINT', + 'ScanANDEnum' : 'Scanning & Enumeration', + 'VulnAnalysis' : 'Vulnerability Analysis', + 'Exploitation' : 'Exploitation', + 'AuxModules' : 'Auxiliary Modules' + } +db_name = "sessionresults.db" +query_list = [db_name] +error_message = "Sorry, please enter one of the choices listed." +menu_message = "Please make your selection: " +input_cursor = "Input > " +main_exit_message = "E) Exit." +higher_menu_exit_message = "0) Exit to higher menu." +exit_message = "Now exiting. Goodbye." + +def query_db(query_list): + db, module, lvl1 = query_list[0], query_list[1], query_list[2] + if len(query_list) == 4: + lvl2, lvl3, num = "", "", query_list[3] + elif len(query_list) == 5: + lvl2, lvl3, num = query_list[3], "", query_list[4] + else: + lvl2, lvl3, num = query_list[3], query_list[4], query_list[5] + print(retrieve_data(db, module, lvl1, lvl2, lvl3, num)) + +def build_db_menu(a_list, b_list, selection): + print(menu_message) + while True: + ctr = 0 + while ctr < len(a_list): + if selection == "Data": + print("{}) Scan {}".format(str(ctr + 1), a_list[ctr])) + elif b_list != "None": + print("{}) {}".format(str(ctr + 1), b_list[ctr])) + else: + print("{}) {}".format(str(ctr + 1), a_list[ctr])) + ctr += 1 + if selection != "None": + print(higher_menu_exit_message) + print(main_exit_message) + cmd = input(input_cursor) + if cmd == "e" or cmd == "E" or cmd == "exit" or cmd == "Exit" or cmd == "EXIT": + print(exit_message) + sys.exit() + elif cmd == "0": + del query_list[-1] + return cmd + elif int(cmd) <= len(a_list): + query_list.append(a_list[int(cmd) - 1]) + if selection == "None": + return a_list[int(cmd) - 1] + elif selection[a_list[int(cmd) - 1]] == "Data": + print(selection[a_list[int(cmd) - 1]]) + query_db(query_list) + del query_list[-1] + else: + return a_list[int(cmd) - 1] + else: + print(error_message) + +def level_five_menu(choice1, choice2, choice3, choice4, menu_data): + a_list = list(menu_data[choice1][choice2][choice3][choice4].keys()) + selection = menu_data[choice1][choice2][choice3][choice4] + result = build_db_menu(a_list, "None", selection) + if result == "0": + level_four_menu(choice1, choice2, choice3, menu_data) + +def level_four_menu(choice1, choice2, choice3, menu_data): + a_list = list(menu_data[choice1][choice2][choice3].keys()) + selection = menu_data[choice1][choice2][choice3] + result = build_db_menu(a_list, "None", selection) + if result == "0": + level_three_menu(choice1, choice2, menu_data) + else: + level_five_menu(choice1, choice2, choice3, result, menu_data) + +def level_three_menu(choice1, choice2, menu_data): + a_list = list(menu_data[choice1][choice2].keys()) + selection = menu_data[choice1][choice2] + result = build_db_menu(a_list, "None", selection) + if result == "0": + level_two_menu(choice1, menu_data) + else: + level_four_menu(choice1, choice2, result, menu_data) + +def level_two_menu(choice, menu_data): + a_list = list(menu_data[choice].keys()) + selection = menu_data[choice] + result = build_db_menu(a_list, "None", selection) + if result == "0": + menu(menu_data) + else: + level_three_menu(choice, result, menu_data) + +def menu(menu_data): + a_list = list(menu_data.keys()) + b_list = [] + for item in a_list: + b_list.append(menu_dict[item]) + result = build_db_menu(a_list, b_list, "None") + level_two_menu(result, menu_data) + +menu_data = get_info(db_name) +menu(menu_data) \ No newline at end of file diff --git a/core/database/tidos.db b/core/database/tidos.db new file mode 100644 index 00000000..d4b04e9c Binary files /dev/null and b/core/database/tidos.db differ diff --git a/core/dispmenu.py b/core/dispmenu.py deleted file mode 100644 index 4d083fe9..00000000 --- a/core/dispmenu.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python2 -# coding: utf-8 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import time -from colors import * - -def dispmenu(): - - print ''' - -\033[1;37m . + \033[1;34m ______ \033[1;37m . . - +. \033[1;34m / ==== \ \033[1;37m . + . . - . . \033[1;36m ,-~--------~-. \033[1;37m * + - \033[1;36m,^\033[1;33m ___ \033[1;36m^.\033[1;37m + * . . . - * * \033[1;36m / \033[1;33m.^ ^. \033[1;36m\ \033[1;37m . \033[1;32m _ | _ - \033[1;36m| \033[1;33m| o ! \033[1;36m|\033[1;37m . \033[1;32m __ \ /--. - . \033[1;36m|\033[1;34m_ \033[1;33m'.___.' \033[1;34m_\033[1;36m|\033[1;37m \033[1;32mI__/_\ / )}\033[1;36m======> \033[1;37m + - \033[1;36m| \033[1;34m"'----------------"\033[1;36m|\033[1;37m + \033[1;32m _[ _(\033[1;33m0\033[1;32m): ))\033[1;36m========> - + . \033[1;36m! !\033[1;37m . \033[1;32mI__\ / \. ]}\033[1;36m======> \033[1;37m . - . \033[1;36m \ \033[1;37mTIDoS Prober \033[1;36m/ \033[1;37m \033[1;32m ~^-.--' - \033[1;36m^. .^ \033[1;37m . \033[1;32m | \033[1;37m +. * - . \033[1;36m "-..______.,-" \033[1;37m. . * - + . . + * . - \033[1;33m-=[ \033[1;31mL E T S S T A R T\033[1;33m ]=-\033[1;37m - + . ' . + + - * . + * . * . -''' - print O+'\n Choose from the options below :\n' - time.sleep(0.2) - print B+' [1] \033[1;36mReconnaissance & OSINT'+W+' (45 modules)' - time.sleep(0.1) - print B+' [2] \033[1;36mScanning & Enumeration'+W+' (14 modules)' - time.sleep(0.1) - print B+' [3] \033[1;36mVulnerability Analysis'+W+' (35 modules)' - time.sleep(0.1) - print B+' [4] \033[1;36mExploitation (beta)'+W+' (only 1)' - time.sleep(0.1) - print B+' [5] \033[1;36mAuxillary Modules'+W+' (3 modules)\n' - time.sleep(0.1) - print B+' [99] \033[1;36mSay "alvida"! (Exit TIDoS)\n' - time.sleep(0.1) - diff --git a/lib/bs4/beautifulsoup4.egg-info/dependency_links.txt b/core/doc/.gitignore similarity index 100% rename from lib/bs4/beautifulsoup4.egg-info/dependency_links.txt rename to core/doc/.gitignore diff --git a/core/doc/CONTRIBUTING b/core/doc/CONTRIBUTING new file mode 100644 index 00000000..e679f8b5 --- /dev/null +++ b/core/doc/CONTRIBUTING @@ -0,0 +1,4 @@ +Contributing to TIDoS Framework +--------------------------------- + + diff --git a/core/doc/DISCLAIMER b/core/doc/DISCLAIMER new file mode 100644 index 00000000..17c1586e --- /dev/null +++ b/core/doc/DISCLAIMER @@ -0,0 +1,4 @@ +DISCLAIMER +---------- + +TIDoS Attack was provided as an open-source, royalty-free penetration testing toolkit. It has capable modules in various phases which can unveil potential dangerous flaws in various web-applications which can further be exploited maliciously. Therefore the author as well as the contrbutors assume no liability for misuse of this toolkit. Usage of TIDoS Attack for testing or exploiting websites without prior mutual consent can be considered as an illegal activity. It is the final user's responsibility to obey all applicable local, state and federal laws. diff --git a/agree b/core/doc/choice similarity index 100% rename from agree rename to core/doc/choice diff --git a/core/doc/local b/core/doc/local new file mode 100644 index 00000000..8d1c8b69 --- /dev/null +++ b/core/doc/local @@ -0,0 +1 @@ + diff --git a/core/doc/man/tidos.1 b/core/doc/man/tidos.1 new file mode 100644 index 00000000..211f0ce3 --- /dev/null +++ b/core/doc/man/tidos.1 @@ -0,0 +1,144 @@ +.TH VAILE 1 2020-03-23 "" + +.SH NAME +.P +\fBVaile\fR — Metasploit\-like pentest framework derived from TIDoS + +.SH SYNOPSIS +.P +\fBVaile\fR [\fIOPTION\fR]... + +.SH DESCRIPTION +.P +\fBVaile\fR is a versatile collection of tools for \fIwebapp\fR security testing. It was designed in order to help penetration testers during all phases of an operation, especially reconnaissance, scanning and vulnerability analysis. + +.P +It features 3 easy\-to\-use interfaces: an interactive shell, an inline CLI and a graphical user interface powered by Qt. Its modules are easily comprehensible and fully verbose, and are using \fImultiprocessing\fR to make attacking more efficient. If requested, TIDoS will also pipe attacks through \fITor\fR, rendering your attacks more anonymous and difficult to track. + +.P +The \fBVaile\fR framework is module\-based, so users can easily create their own modules, or edit the built\-in ones, respecting the general structure of the already existent modules. + +.P +Its structure allows it to evolve and be easily adapted to new features and modules. Currently, it has 108 modules ready to be used. + +.SH OPTIONS +.P +Running \fBVaile\fR without any argument runs an interactive, metasploit\-like shell, with all settings set to default. + +.TP +You can also specify a target URL, combined with a module, to execute this module on given target in a quicker and easier way. +\fB$ TIDoS \-v http://example.com \-l pathtrav\fR + +.TP +You also can use a previously saved \fIsession\fR file as argument, to use it instead of a target URL. Sessions can (and need to) be saved with the \fBsessions\fR command. +\fB$ TIDoS \-v saarsec \-l pathtrav \-s\fR + +.TP +Using TIDoS's \fBVAL\fR file format, you can fully automate attacks, since it sets targets, modules and options automatically. Load a file up this way: +\fB$ TIDoS \-c syn.val\fR + +.TP +When attacking per CLI, you can also make use of the \fBtor\fR command to pipe attacks through the Tor network, like this: +\fB$ TIDoS \-v http://example.com \-l pathtrav \-\-tor\fR + +.TP +Finally, \fBVaile\fR can be run as a graphical user interface, using and wrapping around the core framework. +\fB$ TIDoS \-\-app\fR + +.TP +For inline help and more options, use \fBVaile \-\-help\fR + +.SH INTERFACE +.P +The main interface of \fBVaile\fR framework is its interactive, metasploit\-like console. + +.P +\fBTO GET HELP\fR on a TIDoS command, you can use "\fBhelp \fR" or "\fB? \fR". To list all available commands, use "\fBhelp\fR" resp. "\fB?\fR" without argument. This will also give a short summary on the commands, as listed below. + +.P +\fICommands:\fR + +.TP +\fBattack\fR +Attack specified target(s) +.TP +\fBclear\fR +Clear terminal +.TP +\fBcreds\fR +Handle target credentials +.TP +\fBfetch\fR +Check for and install updates +.TP +\fBfind\fR +Search a module +.TP +\fBhelp\fR +Show help message +.TP +\fBinfo\fR +Show description of current module +.TP +\fBintro\fR +Display Intro +.TP +\fBleave\fR +Leave module +.TP +\fBlist\fR +List all modules of a category +.TP +\fBload\fR +Load module +.TP +\fBnetinfo\fR +Show network information +.TP +\fBopts\fR +Show options of current module +.TP +\fBphpsploit\fR +Load the phpsploit framework (needs to be downloaded externally) +.TP +\fBprocesses\fR +Set number of processes in parallelisation +.TP +\fBq\fR +Terminate TIDoS session +.TP +\fBsessions\fR +Interact with cached sessions +.TP +\fBset\fR +Set option value of module +.TP +\fBtor\fR +Pipe Attacks through the Tor Network +.TP +\fBvicadd\fR +Add Target to list +.TP +\fBvicdel\fR +Delete Target from list +.TP +\fBviclist\fR +List all targets + +.SH ISSUES +.TP +To submit any issue, bug or proposal, please send it in TIDoS's issues section: +https://github.com/VainlyStrain/TIDoS/issues + +.SH CONTRIBUTE +.TP +If you want to contribute to \fBVaile\fR, submit a module, patch, or anything else, take a look at the \fBCONTRIBUTING\fR file, in the core/doc directory + +.SH LICENCE +.P +This software is under the GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 + + +.\" man code generated by txt2tags 2.5 (http://txt2tags.sf.net) +.\" cmdline: txt2tags -q -t man -i man.txt2tags -o TIDoS.1 + diff --git a/core/doc/mystery b/core/doc/mystery new file mode 100644 index 00000000..396a0ba2 --- /dev/null +++ b/core/doc/mystery @@ -0,0 +1 @@ +yes \ No newline at end of file diff --git a/core/doc/vailyn b/core/doc/vailyn new file mode 100644 index 00000000..8d1c8b69 --- /dev/null +++ b/core/doc/vailyn @@ -0,0 +1 @@ + diff --git a/core/doc/version b/core/doc/version new file mode 100644 index 00000000..52837484 --- /dev/null +++ b/core/doc/version @@ -0,0 +1 @@ +2.0.5-1 diff --git a/core/docker/Dockerfile b/core/docker/Dockerfile new file mode 100644 index 00000000..309b40fa --- /dev/null +++ b/core/docker/Dockerfile @@ -0,0 +1,41 @@ +# Pulling base image +FROM ubuntu:18.04 + +# Install TIDoS-Framework dependencies +RUN apt update && \ + apt install --install-recommends -y \ + sudo \ + libncurses5 \ + apt-utils \ + dialog \ + libxml2 \ + nmap \ + git \ + nano \ + xcb \ + tcpdump \ + libexiv2-dev \ + build-essential \ + python-xmpp \ + python-dev \ + python3-pip \ + libmysqlclient-dev \ + tor \ + konsole \ + && \ + rm -rf /var/lib/apt/lists/* && \ + apt-get clean + +# Installing TIDoS-Framework +RUN git clone https://github.com/0xinfection/tidos-framework.git && \ + cd tidos-framework && \ + python3 -m pip install --upgrade --force pip && \ + python3 -m pip install --upgrade --force wheel && \ + python3 -m pip install -r requirements.txt && \ + mkdir -v -p /opt/TIDoS/ && \ + cp -r -v * /opt/TIDoS/ && \ + cp -v tmp/tidos /usr/bin/tidos && \ + export EDITOR=nano && \ + chmod -R 755 /opt/TIDoS/* && \ + chmod -v 755 /usr/bin/tidos && \ + cd .. && rm -rf tidos-framework diff --git a/core/exploits.py b/core/exploits.py deleted file mode 100644 index 8e4822a2..00000000 --- a/core/exploits.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python2 -# coding: utf-8 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import sys -import time -import os -from colors import * -sys.path.append('modules/ExploitTid/') - -from shellshock_exp import * - -def exploits(web): - - print '' - time.sleep(0.3) - v = raw_input (''+GR+' [#] \033[1;4mTID\033[0m'+GR+' :> ' + color.END) - print '' - if v == '1': - print ' [!] Type Selected : Shellshock Exploit' - shellshock_exp(web) - print O+' [!] Completed!' - time.sleep(1) - print '\n\n' - os.system('clear') - diff --git a/core/exploitsban.py b/core/exploitsban.py deleted file mode 100644 index 85d59bc4..00000000 --- a/core/exploitsban.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python2 -# coding: utf-8 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import os, time - -def exploitsban(): - - os.system('clear') - print " [!] Module Selected : Exploits\n\n" - time.sleep(0.5) - print ''' - \033[1;33m|\ \033[1;33m/) -\033[1;37m /\_\033[1;33m\\\033[1;37m__ \033[1;33m(_// -\033[1;37m| `>\-` \033[1;36m_._ \033[1;33m//`) -\033[1;37m \ /` \033[1;33m\\\033[1;36m _.-\033[1;34m:::\033[1;36m`-._ \033[1;33m// -\033[1;37m ` \033[1;33m\|\033[1;36m` \033[1;34m::: \033[1;36m `|\033[1;33m// \033[1;37m[0x00] \033[1;31mE X P L O I T S -\033[1;36m | \033[1;34m ::: \033[1;36m|\033[1;33m/ -\033[1;36m |\033[1;34m.....:::.....\033[1;36m| \033[1;31mC A S T L E \033[1;37m[0x00] -\033[1;36m |\033[1;34m:::::::::::::\033[1;36m| -\033[1;36m | \033[1;34m::: \033[1;36m| -\033[1;36m \ \033[1;34m ::: \033[1;36m/ -\033[1;36m \ \033[1;34m ::: \033[1;36m / \033[1;37m" [0x00] \033[1;33mCode the Exploits -\033[1;36m `-. \033[1;34m:::\033[1;36m .-' \033[1;33mand - \033[1;33m//\033[1;36m`:::`\033[1;33m\\ \033[1;33mExploit the Codes \033[1;37m[0x00] " - \033[1;33m// \033[1;36m' \033[1;33m\\ - |/ \033[1;33m\| - -\033[1;36m [1] \033[1;34mShellShock Exploit\n -\033[1;36m [!info] \033[1;34m This module is still being worked on... -''' - diff --git a/core/footprint.py b/core/footprint.py deleted file mode 100644 index 9d2dc422..00000000 --- a/core/footprint.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python2 -# coding: utf-8 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import os -import time - -def footprint(web): - - from passiveo import * - from activeo import * - from infodisc import * - from footprintban import * - from colors import * - - m = raw_input(O+' [#] \033[1;4mTID\033[0m'+GR+' :> ' + color.END) - print '' - if m == '1': - passiveo(web) - - elif m == '2': - activeo(web) - - elif m == '3': - infodisc(web) - - elif m == '99': - print ' [+] Back!' - - else: - - print '' - dope = [' [*] You high dude?',' [*] Hey there! Enter a valid option',' [*] Whoops! Thats not an option',' [*] Sorry fam! You just typed shit'] - print dope[randint(0,3)] - time.sleep(0.5) - footprintban() - footprint(web) diff --git a/core/footprintban.py b/core/footprintban.py deleted file mode 100644 index dff6f881..00000000 --- a/core/footprintban.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python2 -# coding: utf-8 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import os, time -from colors import * - -def footprintban(): - - time.sleep(0.5) - os.system('clear') - print G+' [!] Module Loaded : Reconnaissance\n' - print """ -\033[1;36m - -\033[1;37m . . - * . . . . * . - . . . . . \033[1;35m ### - \033[1;35mo \033[1;33m-=[ \033[1;31mR E C O N N A I S S A N C E \033[1;33m]=- \033[1;35m > ######-\033[1;37m --0 - + . . . \033[1;35m### - \033[1;35m0\033[37m . . . - . . + , , , - . \033[1;35m\ \033[37m . . + . - . \033[1;35m\ \033[36m . . \033[1;36m###\033[1;37m . - . \033[1;35mo \033[36m . \033[1;33m> \033[1;36m###########- --0\033[1;37m . + - . \033[36m\ \033[1;36m########\033[1;37m . . - \033[34m#\##\##. \033[1;33m> \033[1;36m###########- --0\033[1;37m . . - + \033[34m# #O##\### \033[1;36m###\033[1;37m . + . - . \033[34m#*# #\##\###\033[37m . + . - . \033[34m##*# #\##\##\033[37m + . - . \033[34m##*# #o##\# \033[37m . * , . - . \033[34m**# #\# \033[37m . . . - + \ . \033[33m/\^ \033[37m \033[1;33m.". \033[1;33m/ -\033[1;33m____^/\___^--____/\____\033[1;31mO\033[1;33m_____________/ \/\___________/\/ \______________ - /\^ ^ ^ ^ ^^ ^ '\ ^ ^ --- - -- - -- - - --- __ ^ - -- __ ___-- ^ ^ -- __ - -Choose from the following options: - -\033[1;34m [1] \033[1;36mPassive Footprinting \033[1;0m(Open Source Intelligence) -\033[1;34m [2] \033[1;36mActive Reconnaissance \033[1;0m(Gather via Interaction) -\033[1;34m [3] \033[1;36mInformation Disclosure \033[1;0m(Errors, Emails, etc) - -\033[1;34m [99] \033[1;36mBack -""" diff --git a/core/impo.py b/core/impo.py deleted file mode 100644 index e38cbbe0..00000000 --- a/core/impo.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python2 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import sys -import subprocess -from subprocess import call -sys.path.append('core/') - -from inputin import * -from passiveo import * -from passiveban import * -from activeo import * -from activeban import * -from banner import * -from dispmenu import * -from infodisc import * -from agreement import * -from agree import * -from loadstyle import * -from banner1 import * -from webbugs import * -from othbugs import * -from othbugsban import * -from webbugsban import * -from serbugs import * -from serbugsban import * -from auxil import * -from auxilban import * -from colors import * -from exploits import * -from exploitsban import * -from footprintban import * -from footprint import * -from scanenumban import * -from scanenum import * -from vuln import * -from vulnban import * -from tid import * diff --git a/core/infodisc.py b/core/infodisc.py deleted file mode 100644 index f6843ab4..00000000 --- a/core/infodisc.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python2 -# coding:'+B+' utf-8 - -#-:'+B+'-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import sys -import os -import time -import subprocess -import random -from random import randint -from subprocess import call -sys.path.append('modules/InfoDisc/') - -from credit import * -from emailext import * -from errors import * -from phone import * -from ssn import * -from infodiscban import * -from colors import * -from internalip import * - -def infodisc(web): - - print " [!] Module Selected : Information Disclosure\n\n" - infodiscban() - print '' - time.sleep(0.3) - v = raw_input (''+GR+' [#] \033[1;4mTID\033[0m'+GR+' :> ' + color.END) - print '' - if v == '1': - print C+' [!] Type Selected :'+B+' Credit Card Enumeration' - credit(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - infodisc(web) - - elif v == '2': - print C+' [!] Type Selected :'+B+' Extract All Emails' - emailext(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - infodisc(web) - - elif v == '3': - print C+' [!] Type Selected :'+B+' Enumerate Errors + FPD' - errors(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - infodisc(web) - - elif v == '4': - print C+' [!] Type Selected :'+B+' Internal IP disclosure' - internalip(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - infodisc(web) - - elif v == '5': - print C+' [!] Type Selected :'+B+' Phone Numbers Extract' - phone(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - infodisc(web) - - elif v == '6': - print C+' [!] Type Selected :'+B+' Social Security Numbers' - ssn(web) - print '\n\n' - raw_input(O+' [#] Press '+GR+'Enter'+O+' to continue...') - infodisc(web) - - elif v == 'A': - print C+' [!] Type Selected :'+B+' All Modules' - time.sleep(0.5) - print C+' [*] Firing up module -->'+B+' Credit Cards' - credit(web) - print C+' [!] Module Completed -->'+B+' Credit Cards\n' - - time.sleep(1) - print C+' [*] Firing up module -->'+B+' Email Extraction' - emailext(web) - print C+' [!] Module Completed -->'+B+' Email Hunt\n' - - time.sleep(1) - print C+' [*] Firing up module -->'+B+' Errors Enumeration + FPD' - errors(web) - print C+' [!] Module Completed -->'+B+' Errors Enumeration\n' - time.sleep(1) - - print C+' [*] Firing up module -->'+B+' Extract Phone Numbers' - phone(web) - print C+' [!] Module Completed -->'+B+' Extract Phone Numbers\n' - time.sleep(1) - - print C+' [*] Firing up module -->'+B+' Extract Social Security Numbers' - ssn(web) - print C+' [!] Module Completed -->'+B+' Extract SSN\n' - time.sleep(1) - - print C+' [!] All scantypes have been tested on target...' - time.sleep(1) - print C+' [*] Going back to menu...' - infodisc(web) - - elif v == '99': - print C+' [*] Back to the menu !' - time.sleep(0.8) - os.system('clear') - - else: - dope = ['You high dude?','Shit! Enter a valid option','Whoops! Thats not an option','Sorry! You just typed shit'] - print dope[randint(0,3)] - time.sleep(0.7) - os.system('clear') - infodisc(web) - diff --git a/core/infodiscban.py b/core/infodiscban.py deleted file mode 100644 index 444907f2..00000000 --- a/core/infodiscban.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python2 -# coding: utf-8 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -from colors import * -import time - -def infodiscban(): - - time.sleep(0.4) - print G+'\n +-------------------+' - print G+' | '+O+'INFO DISCLOSURE'+G+' |' - print G+' +-------------------+' - time.sleep(0.3) - print '' - print B+' [1]'+C+' Find out Credit Cards '+W+'(if disclosed in plain text)' - time.sleep(0.1) - print B+' [2]'+C+' Extract out all existing emails'+W+' (Absolute)' - time.sleep(0.1) - print B+' [3]'+C+' Enumerate Errors '+W+'(Includes Full Path Disclosure)' - time.sleep(0.1) - print B+' [4]'+C+' Find out any leaks of internal IP addresses' - time.sleep(0.1) - print B+' [5]'+C+' Extract out all Phone Numbers '+W+'(if plaintext disclosure)' - time.sleep(0.1) - print B+' [6]'+C+' Extract out all Social Security Numbers '+W+'(US Based)\n' - time.sleep(0.1) - print B+' [A]'+C+' Start up ALL modules 1 by 1\n' - time.sleep(0.1) - print B+' [99]'+C+' Back\n' - diff --git a/core/initfiles/sample.vaile b/core/initfiles/sample.vaile new file mode 100644 index 00000000..21098bb8 --- /dev/null +++ b/core/initfiles/sample.vaile @@ -0,0 +1,3 @@ +sessions load --val saarsec.val +load pathtrav +attack diff --git a/core/inputin.py b/core/inputin.py deleted file mode 100644 index 4f4b8ac4..00000000 --- a/core/inputin.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python2 -# coding: utf-8 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import sys -import socket -import time -import os -from colors import * - -def inputin(): - - try: - web = raw_input(''+O+' [#] Target web address :> '+C+'') - global web - if 'http' not in str(web): - mo = raw_input(GR+' [#] Does this website use SSL? (y/n) :> ') - if mo == 'y' or mo == 'Y': - web = 'https://'+web - elif mo == 'n': - web = 'http://'+web - if 'http://' in web: - po = web.replace('http://','') - elif 'https://' in web: - po = web.replace('https://','') - if str(web).endswith('/'): - web = po[:-1] - po = po[:-1] - print GR+' [*] Checking server status...' - time.sleep(0.6) - - try: - ip = socket.gethostbyname(po) - print G+' [+] Site seems to be up...' - time.sleep(0.5) - print G+' [+] IP Detected : '+O+ip - time.sleep(0.5) - print '' - os.system('cd tmp/logs/ && rm -rf '+po+'-logs && mkdir '+po+'-logs') - return web - - except socket.gaierror: - print R+' [-] Site seems to be down...' - sys.exit(1) - - except KeyboardInterrupt: - print R+' [-] Exiting...' - time.sleep(0.7) - print C+' [#] Alvida, my friend!' - sys.exit(1) - diff --git a/core/install.py b/core/install.py new file mode 100755 index 00000000..5b40110c --- /dev/null +++ b/core/install.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +_____, ___ + '+ .; + , ; + . + + . + .;. + .; + : + , + + +┌─[TIDoS]─[] +└──╼ VainlyStrain +""" + +import os + +print('''\033[1m + ___ _ _ _ + |_ _| _ _ ___ | |_ __ _ | | | | + | | | ' \\ (_-< | _| / _` | | | | | + |___| |_||_| /__/_ _\\__| \\__,_| _|_|_ _|_|_ +_|"""""|_|"""""|_|"""""|_|"""""|_|"""""|_|"""""|_|"""""| +"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-' +''') + + +print(" [+] Installing dependencies (1/2): Package Manager") +os.system("apt-get install libncurses5 libxml2 nmap tcpdump libexiv2-dev build-essential python3-pip libmariadbclient18 libmysqlclient-dev tor konsole") + +print(" [+] Installing dependencies (2/2): pip3") +os.system("pip3 install -r requirements.txt") + +print(" [+] Installing TIDoS...") +os.system('mkdir -v -p /opt/TIDoS/') +os.system('cp -r -v ../* /opt/TIDoS/') +os.system('cp -v ../tmp/tidos /usr/bin/tidos') +os.system('chmod -R 755 /opt/TIDoS/*') +os.system('chmod -v 755 /usr/bin/tidos') + +print("Installation process complete. Run 'tidos' to launch the framework.\033[0m") diff --git a/core/lib/FileUtils.py b/core/lib/FileUtils.py new file mode 100644 index 00000000..aa3ed0c0 --- /dev/null +++ b/core/lib/FileUtils.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +#-:-:-:-:-:-:-:-:-:-:-:-:# +# TIDoS Framework # +#-:-:-:-:-:-:-:-:-:-:-:-:# + +#This module requires TIDoS Framework +#https://github.com/0xInfection/TIDoS-Framework + +import os +import os.path + + +class File(object): + def __init__(self, *pathComponents): + self._path = FileUtils.buildPath(*pathComponents) + self.content = None + + @property + def path(self): + return self._path + + @path.setter + def path(self, value): + raise NotImplemented + + def isValid(self): + return FileUtils.isFile(self.path) + + def exists(self): + return FileUtils.exists(self.path) + + def canRead(self): + return FileUtils.canRead(self.path) + + def canWrite(self): + return FileUtils.canWrite(self.path) + + def read(self): + return FileUtils.read(self.path) + + def update(self): + self.content = self.read() + + def content(self): + if not self.content: + self.content = FileUtils.read() + return self.content() + + def getLines(self): + for line in FileUtils.getLines(self.path): + yield line + + def __cmp__(self, other): + if not isinstance(other, File): + raise NotImplemented + return cmp(self.content(), other.content()) + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + pass + + +class FileUtils(object): + @staticmethod + def buildPath(*pathComponents): + if pathComponents: + path = os.path.join(*pathComponents) + else: + path = '' + return path + + @staticmethod + def exists(fileName): + return os.access(fileName, os.F_OK) + + @staticmethod + def canRead(fileName): + if not os.access(fileName, os.R_OK): + return False + try: + with open(fileName): + pass + except IOError: + return False + return True + + @staticmethod + def canWrite(fileName): + return os.access(fileName, os.W_OK) + + @staticmethod + def read(fileName): + result = '' + with open(fileName, 'r') as fd: + for line in fd.readlines(): + result += line + return result + + @staticmethod + def getLines(fileName): + with open(fileName, 'r', errors="replace") as fd: + return fd.read().splitlines() + + @staticmethod + def isDir(fileName): + return os.path.isdir(fileName) + + @staticmethod + def isFile(fileName): + return os.path.isfile(fileName) + + @staticmethod + def createDirectory(directory): + if not FileUtils.exists(directory): + os.makedirs(directory) + + @staticmethod + def sizeHuman(num): + base = 1024 + for x in ['B ', 'KB', 'MB', 'GB']: + if num < base and num > -base: + return "%3.0f%s" % (num, x) + num /= base + return "%3.0f %s" % (num, 'TB') + + @staticmethod + def writeLines(fileName, lines): + content = None + if type(lines) is list: + content = "\n".join(lines) + else: + content = lines + with open(fileName, "w") as f: + f.writelines(content) diff --git a/core/lib/Photon/.travis.yml b/core/lib/Photon/.travis.yml new file mode 100644 index 00000000..b5731398 --- /dev/null +++ b/core/lib/Photon/.travis.yml @@ -0,0 +1,16 @@ +language: python +os: + - linux +python: + - 3.6 +install: + - pip install -r requirements.txt + - pip install flake8 +before_script: + # stop the build if there are Python syntax errors or undefined names + - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics +script: + - python photon.py -u "https://somdev.me/" -l 1 -d 1 -t 100 --regex "\d{10}" --dns --output="d3v" + - python photon.py -u "https://somdev.me/" -l 1 -t 10 --seeds="https://somdev.me/posts" --only-urls --export=json --wayback diff --git a/core/lib/Photon/.whitesource b/core/lib/Photon/.whitesource new file mode 100644 index 00000000..f0569521 --- /dev/null +++ b/core/lib/Photon/.whitesource @@ -0,0 +1,8 @@ +{ + "generalSettings": { + "shouldScanRepo": true + }, + "checkRunSettings": { + "vulnerableCheckRunConclusionLevel": "failure" + } +} \ No newline at end of file diff --git a/core/lib/Photon/CHANGELOG.md b/core/lib/Photon/CHANGELOG.md new file mode 100644 index 00000000..cc5fdad3 --- /dev/null +++ b/core/lib/Photon/CHANGELOG.md @@ -0,0 +1,121 @@ +#### v1.3.2 +- add support for socks proxies +- add rotating proxies +- `-p` now takes `IP:PORT` or `DOMAIN:PORT` with `http://`, `socks5://` or nothing (default is `http`) or a `file` with a list of proxies (`http://` or `socks5://` or nothing). + +#### v1.3.1 +- Added more intels (GENERIC_URL, BRACKET_URL, BACKSLASH_URL, HEXENCODED_URL, URLENCODED_URL, B64ENCODED_URL, IPV4, IPV6, EMAIL, MD5, SHA1, SHA256, SHA512, YARA_PARSE, CREDIT_CARD) +- proxy support with `-p, --proxy` option (http proxy only) +- minor fixes and pep8 format + +#### v1.3.0 +- Dropped Python < 3.2 support +- Removed Ninja mode +- Fixed a bug in link parsing +- Fixed Unicode output +- Fixed a bug which caused URLs to be treated as files +- Intel is now associated with the URL where it was found + +#### v1.2.1 +- Added cloning ability +- Refactored to be modular + +#### v1.1.6 +- Reuse TCP connection for better performance +- Handle redirect loops +- CSV export support +- Fixed `sitemap.xml` parsing +- Improved regex + +#### v1.1.5 +- fixed some minor bugs +- fixed a bug in domain name parsing +- added --headers option for interactive HTTP headers input + +#### v1.1.4 +- Added `-v` option +- Fixed progress animation for Python 2 +- Added `developer.facebook.com` API for Ninja mode + +#### v1.1.3 +- Added `--stdout` option +- Fixed a bug in `zap()` function +- Fixed crashing when target is an IP address +- Minor refactor + +#### v1.1.2 +- Added `--wayback` +- Fixed progress bar for Python > 3.2 +- Added `/core/config.py` for easy customization +- `--dns` now saves subdomains in `subdomains.txt` + +#### v1.1.1 +- Use of `ThreadPoolExecutor` for x2 speed (for Python > 3.2) +- Fixed mishandling of urls starting with `//` +- Removed a redundant try-except statement +- Evaluate entropy of found keys to avoid false positives + +#### v1.1.0 +- Added `--keys` option +- Fixed a bug related to SSL certificate verification + +#### v1.0.9 +- Code refactor +- Better identification of external URLs +- Fixed a major bug that made several intel URLs pass under the radar +- Fixed a major bug that caused non-html type content to be marked a crawlable URL + +#### v1.0.8 +- added `--exclude` option +- Better regex and code logic to favor performance +- Fixed a bug that caused dnsdumpster to fail if target was a subdomain +- Fixed a bug that caused a crash if run outside "Photon" directory +- Fixed a bug in file saving (specific to Python 3) + +#### v1.0.7 +- Added `--timeout` option +- Added `--output` option +- Added `--user-agent` option +- Replaced lxml with regex +- Better logic for favoring performance +- Added bigger and separate file for user-agents + +#### v1.0.6 +- Fixed lot of bugs +- Suppress SSL warnings in MAC +- x100 speed by code optimization +- Simplified code of `exporter` plugin + +#### v1.0.5 +- Added `exporter` plugin +- Added seamless update ability +- Fixed a bug in update function + +#### v1.0.4 +- Fixed an issue which caused regular links to be saved in robots.txt +- Simplified `flash` function +- Removed `-n` as an alias of `--ninja` +- Added `--only-urls` option +- Refactored code for readability +- Skip saving files if the content is empty + +#### v1.0.3 +- Introduced plugins +- Added `dnsdumpster` plugin +- Fixed non-ascii character handling, again +- 404 pages are now added to `failed` list +- Handling exceptions in `jscanner` + +#### v1.0.2 +- Proper handling of null response from `robots.txt` & `sitemap.xml` +- Python2 compatibility +- Proper handling of non-ascii chars +- Added ability to specify custom regex pattern +- Display total time taken and average time per request + +#### v1.0.1 +- Disabled colors on Windows and macOS +- Cross platform file handling + +#### v1.0.0 +- First stable release diff --git a/core/lib/Photon/Dockerfile b/core/lib/Photon/Dockerfile new file mode 100644 index 00000000..4b9fc881 --- /dev/null +++ b/core/lib/Photon/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3-alpine + +LABEL name photon +LABEL src "https://github.com/s0md3v/Photon" +LABEL creator s0md3v +LABEL dockerfile_maintenance khast3x +LABEL desc "Incredibly fast crawler designed for reconnaissance." + +RUN apk add git && git clone https://github.com/s0md3v/Photon.git Photon +WORKDIR Photon +RUN pip install -r requirements.txt + +VOLUME [ "/Photon" ] +# ENTRYPOINT ["sh"] +ENTRYPOINT [ "python", "photon.py" ] +CMD ["--help"] diff --git a/core/lib/Photon/LICENSE.md b/core/lib/Photon/LICENSE.md new file mode 100644 index 00000000..deea74ad --- /dev/null +++ b/core/lib/Photon/LICENSE.md @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + {{ project }} Copyright (C) {{ year }} {{ organization }} + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. \ No newline at end of file diff --git a/core/lib/Photon/MANIFEST.in b/core/lib/Photon/MANIFEST.in new file mode 100644 index 00000000..e267daf0 --- /dev/null +++ b/core/lib/Photon/MANIFEST.in @@ -0,0 +1 @@ +include LICENSE.md diff --git a/core/lib/Photon/README.md b/core/lib/Photon/README.md new file mode 100644 index 00000000..bf1de88d --- /dev/null +++ b/core/lib/Photon/README.md @@ -0,0 +1,108 @@ + +

+
+ Photon +
+ Photon +
+

+ +

Incredibly fast crawler designed for OSINT.

+ +

+ + + + + pypi + + + + + + + +

+ +![demo](https://image.ibb.co/kQSUcz/demo.png) + +

+ Photon Wiki • + How To Use • + Compatibility • + Photon Library • + Contribution • + Roadmap +

+ +### Key Features + +#### Data Extraction +Photon can extract the following data while crawling: + +- URLs (in-scope & out-of-scope) +- URLs with parameters (`example.com/gallery.php?id=2`) +- Intel (emails, social media accounts, amazon buckets etc.) +- Files (pdf, png, xml etc.) +- Secret keys (auth/API keys & hashes) +- JavaScript files & Endpoints present in them +- Strings matching custom regex pattern +- Subdomains & DNS related data + +The extracted information is saved in an organized manner or can be [exported as json](https://github.com/s0md3v/Photon/wiki/Usage#export-formatted-result). + +![save demo](https://image.ibb.co/dS1BqK/carbon_2.png) + +#### Flexible +Control timeout, delay, add seeds, exclude URLs matching a regex pattern and other cool stuff. +The extensive range of [options](https://github.com/s0md3v/Photon/wiki/Usage) provided by Photon lets you crawl the web exactly the way you want. + +#### Genius +Photon's smart thread management & refined logic gives you top notch performance. + +Still, crawling can be resource intensive but Photon has some tricks up it's sleeves. You can fetch URLs archived by [archive.org](https://archive.org/) to be used as seeds by using `--wayback` option. + +#### Plugins +- **[wayback](https://github.com/s0md3v/Photon/wiki/Usage#use-urls-from-archiveorg-as-seeds)** +- **[dnsdumpster](https://github.com/s0md3v/Photon/wiki/Usage#dumping-dns-data)** +- **[Exporter](https://github.com/s0md3v/Photon/wiki/Usage#export-formatted-result)** + +#### Docker + +Photon can be launched using a lightweight Python-Alpine (103 MB) Docker image. + +```bash +$ git clone https://github.com/s0md3v/Photon.git +$ cd Photon +$ docker build -t photon . +$ docker run -it --name photon photon:latest -u google.com +``` + +To view results, you can either head over to the local docker volume, which you can find by running `docker inspect photon` or by mounting the target loot folder: + +```bash +$ docker run -it --name photon -v "$PWD:/Photon/google.com" photon:latest -u google.com +``` + +#### Frequent & Seamless Updates +Photon is under heavy development and updates for fixing bugs. optimizing performance & new features are being rolled regularly. + +If you would like to see features and issues that are being worked on, you can do that on [Development](https://github.com/s0md3v/Photon/projects/1) project board. + +Updates can be installed & checked for with the `--update` option. Photon has seamless update capabilities which means you can update Photon without losing any of your saved data. + +### Contribution & License +You can contribute in following ways: + +- Report bugs +- Develop plugins +- Add more "APIs" for ninja mode +- Give suggestions to make it better +- Fix issues & submit a pull request + +Please read the [guidelines](https://github.com/s0md3v/Photon/wiki/Guidelines) before submitting a pull request or issue. + +Do you want to have a conversation in private? Hit me up on my [twitter](https://twitter.com/s0md3v/), inbox is open :) + +**Photon** is licensed under [GPL v3.0 license](https://www.gnu.org/licenses/gpl-3.0.en.html) diff --git a/core/lib/Photon/core/__init__.py b/core/lib/Photon/core/__init__.py new file mode 100644 index 00000000..0d75f5e1 --- /dev/null +++ b/core/lib/Photon/core/__init__.py @@ -0,0 +1 @@ +"""The Photon core.""" diff --git a/core/lib/Photon/core/colors.py b/core/lib/Photon/core/colors.py new file mode 100644 index 00000000..f549f64b --- /dev/null +++ b/core/lib/Photon/core/colors.py @@ -0,0 +1,17 @@ +import sys + +if sys.platform.lower().startswith(('os', 'win', 'darwin', 'ios')): + # Colors shouldn't be displayed on Mac and Windows + end = red = white = green = yellow = run = bad = good = info = que = '' +else: + white = '\033[97m' + green = '\033[92m' + red = '\033[91m' + yellow = '\033[93m' + end = '\033[0m' + back = '\033[7;91m' + info = '\033[93m[!]\033[0m' + que = '\033[94m[?]\033[0m' + bad = '\033[91m[-]\033[0m' + good = '\033[92m[+]\033[0m' + run = '\033[97m[~]\033[0m' diff --git a/core/lib/Photon/core/config.py b/core/lib/Photon/core/config.py new file mode 100644 index 00000000..0a54b9ab --- /dev/null +++ b/core/lib/Photon/core/config.py @@ -0,0 +1,27 @@ +"""Configuration options for Photon.""" + +VERBOSE = False + +INTELS = [ + 'facebook.com', + 'github.com', + 'instagram.com', + 'youtube.com', +] + +BAD_TYPES = ( + 'bmp', + 'css', + 'csv', + 'docx', + 'ico', + 'jpeg', + 'jpg', + 'js', + 'json', + 'pdf', + 'png', + 'svg', + 'xls', + 'xml', +) diff --git a/core/lib/Photon/core/flash.py b/core/lib/Photon/core/flash.py new file mode 100644 index 00000000..0741fbba --- /dev/null +++ b/core/lib/Photon/core/flash.py @@ -0,0 +1,20 @@ +# from __future__ import print_function +import concurrent.futures +import sys +import os +from os import path +sys.path.append(os.path.abspath('.')) + +from .colors import info + +def flash(function, links, thread_count): + """Process the URLs and uses a threadpool to execute a function.""" + # Convert links (set) to list + links = list(links) + threadpool = concurrent.futures.ThreadPoolExecutor( + max_workers=thread_count) + futures = (threadpool.submit(function, link) for link in links) + for i, _ in enumerate(concurrent.futures.as_completed(futures)): + if i + 1 == len(links) or (i + 1) % thread_count == 0: + print('%s Progress: %i/%i' % (info, i + 1, len(links))) + print('') diff --git a/core/lib/Photon/core/mirror.py b/core/lib/Photon/core/mirror.py new file mode 100644 index 00000000..8dfebe06 --- /dev/null +++ b/core/lib/Photon/core/mirror.py @@ -0,0 +1,39 @@ +import os + + +def mirror(url, response): + if response != 'dummy': + clean_url = url.replace('http://', '').replace('https://', '').rstrip('/') + parts = clean_url.split('?')[0].split('/') + root = parts[0] + webpage = parts[-1] + parts.remove(root) + try: + parts.remove(webpage) + except ValueError: + pass + prefix = root + '_mirror' + try: + os.mkdir(prefix) + except OSError: + pass + suffix = '' + if parts: + for directory in parts: + suffix += directory + '/' + try: + os.mkdir(prefix + '/' + suffix) + except OSError: + pass + path = prefix + '/' + suffix + trail = '' + if '.' not in webpage: + trail += '.html' + if webpage == root: + name = 'index.html' + else: + name = webpage + if len(url.split('?')) > 1: + trail += '?' + url.split('?')[1] + with open(path + name + trail, 'w+') as out_file: + out_file.write(response.encode('utf-8')) diff --git a/core/lib/Photon/core/prompt.py b/core/lib/Photon/core/prompt.py new file mode 100644 index 00000000..0e3e84bf --- /dev/null +++ b/core/lib/Photon/core/prompt.py @@ -0,0 +1,22 @@ +"""Support for an input prompt.""" +import os +import tempfile + + +def prompt(default=None): + """Present the user a prompt.""" + editor = 'nano' + with tempfile.NamedTemporaryFile(mode='r+') as tmpfile: + if default: + tmpfile.write(default) + tmpfile.flush() + + child_pid = os.fork() + is_child = child_pid == 0 + + if is_child: + os.execvp(editor, [editor, tmpfile.name]) + else: + os.waitpid(child_pid, 0) + tmpfile.seek(0) + return tmpfile.read().strip() diff --git a/core/lib/Photon/core/regex.py b/core/lib/Photon/core/regex.py new file mode 100644 index 00000000..570742d8 --- /dev/null +++ b/core/lib/Photon/core/regex.py @@ -0,0 +1,234 @@ +import re + +# regex taken from https://github.com/InQuest/python-iocextract +# Reusable end punctuation regex. +END_PUNCTUATION = r"[\.\?>\"'\)!,}:;\u201d\u2019\uff1e\uff1c\]]*" + +# Reusable regex for symbols commonly used to defang. +SEPARATOR_DEFANGS = r"[\(\)\[\]{}<>\\]" + +# Split URLs on some characters that may be valid, but may also be garbage. +URL_SPLIT_STR = r"[>\"'\),};]" + +# Get basic url format, including a few obfuscation techniques, main anchor is the uri scheme. +GENERIC_URL = re.compile(r""" + ( + # Scheme. + [fhstu]\S\S?[px]s? + # One of these delimiters/defangs. + (?: + :\/\/| + :\\\\| + :?__ + ) + # Any number of defang characters. + (?: + \x20| + """ + SEPARATOR_DEFANGS + r""" + )* + # Domain/path characters. + \w + \S+? + # CISCO ESA style defangs followed by domain/path characters. + (?:\x20[\/\.][^\.\/\s]\S*?)* + ) + """ + END_PUNCTUATION + r""" + (?=\s|$) + """, re.IGNORECASE | re.VERBOSE | re.UNICODE) + +# Get some obfuscated urls, main anchor is brackets around the period. +BRACKET_URL = re.compile(r""" + \b + ( + [\.\:\/\\\w\[\]\(\)-]+ + (?: + \x20? + [\(\[] + \x20? + \. + \x20? + [\]\)] + \x20? + \S*? + )+ + ) + """ + END_PUNCTUATION + r""" + (?=\s|$) + """, re.VERBOSE | re.UNICODE) + +# Get some obfuscated urls, main anchor is backslash before a period. +BACKSLASH_URL = re.compile(r""" + \b + ( + [\:\/\\\w\[\]\(\)-]+ + (?: + \x20? + \\?\. + \x20? + \S*? + )*? + (?: + \x20? + \\\. + \x20? + \S*? + ) + (?: + \x20? + \\?\. + \x20? + \S*? + )* + ) + """ + END_PUNCTUATION + r""" + (?=\s|$) + """, re.VERBOSE | re.UNICODE) + +# Get hex-encoded urls. +HEXENCODED_URL = re.compile(r""" + ( + [46][86] + (?:[57]4)? + [57]4[57]0 + (?:[57]3)? + 3a2f2f + (?:2[356def]|3[0-9adf]|[46][0-9a-f]|[57][0-9af])+ + ) + (?:[046]0|2[0-2489a-c]|3[bce]|[57][b-e]|[8-f][0-9a-f]|0a|0d|09|[ + \x5b-\x5d\x7b\x7d\x0a\x0d\x20 + ]|$) + """, re.IGNORECASE | re.VERBOSE) + +# Get urlencoded urls. +URLENCODED_URL = re.compile(r""" + (s?[hf]t?tps?%3A%2F%2F\w[\w%-]*?)(?:[^\w%-]|$) + """, re.IGNORECASE | re.VERBOSE) + +# Get base64-encoded urls. +B64ENCODED_URL = re.compile(r""" + ( + # b64re '([hH][tT][tT][pP][sS]|[hH][tT][tT][pP]|[fF][tT][pP])://' + # Modified to ignore whitespace. + (?: + [\x2b\x2f-\x39A-Za-z]\s*[\x2b\x2f-\x39A-Za-z]\s*[\x31\x35\x39BFJNRVZdhlptx]\s*[Gm]\s*[Vd]\s*[FH]\s*[A]\s*\x36\s*L\s*y\s*[\x2b\x2f\x38-\x39]\s*| + [\x2b\x2f-\x39A-Za-z]\s*[\x2b\x2f-\x39A-Za-z]\s*[\x31\x35\x39BFJNRVZdhlptx]\s*[Io]\s*[Vd]\s*[FH]\s*[R]\s*[Qw]\s*[O]\s*i\s*\x38\s*v\s*[\x2b\x2f-\x39A-Za-z]\s*| + [\x2b\x2f-\x39A-Za-z]\s*[\x2b\x2f-\x39A-Za-z]\s*[\x31\x35\x39BFJNRVZdhlptx]\s*[Io]\s*[Vd]\s*[FH]\s*[R]\s*[Qw]\s*[Uc]\s*[z]\s*o\s*v\s*L\s*[\x2b\x2f-\x39w-z]\s*| + [\x2b\x2f-\x39A-Za-z]\s*[\x30\x32EGUWkm]\s*[Z]\s*[\x30U]\s*[Uc]\s*[D]\s*o\s*v\s*L\s*[\x2b\x2f-\x39w-z]\s*| + [\x2b\x2f-\x39A-Za-z]\s*[\x30\x32EGUWkm]\s*[h]\s*[\x30U]\s*[Vd]\s*[FH]\s*[A]\s*\x36\s*L\s*y\s*[\x2b\x2f\x38-\x39]\s*| + [\x2b\x2f-\x39A-Za-z]\s*[\x30\x32EGUWkm]\s*[h]\s*[\x30U]\s*[Vd]\s*[FH]\s*[B]\s*[Tz]\s*[O]\s*i\s*\x38\s*v\s*[\x2b\x2f-\x39A-Za-z]\s*| + [RZ]\s*[ln]\s*[R]\s*[Qw]\s*[O]\s*i\s*\x38\s*v\s*[\x2b\x2f-\x39A-Za-z]\s*| + [Sa]\s*[FH]\s*[R]\s*[\x30U]\s*[Uc]\s*[D]\s*o\s*v\s*L\s*[\x2b\x2f-\x39w-z]\s*| + [Sa]\s*[FH]\s*[R]\s*[\x30U]\s*[Uc]\s*[FH]\s*[M]\s*\x36\s*L\s*y\s*[\x2b\x2f\x38-\x39]\s* + ) + # Up to 260 characters (pre-encoding, reasonable URL length). + [A-Za-z0-9+/=\s]{1,357} + ) + (?=[^A-Za-z0-9+/=\s]|$) + """, re.VERBOSE) + +# Get some valid obfuscated ip addresses. +IPV4 = re.compile(r""" + (?:^| + (?![^\d\.]) + ) + (?: + (?:[1-9]?\d|1\d\d|2[0-4]\d|25[0-5]) + [\[\(\\]*?\.[\]\)]*? + ){3} + (?:[1-9]?\d|1\d\d|2[0-4]\d|25[0-5]) + (?:(?=[^\d\.])|$) + """, re.VERBOSE) + +# Experimental IPv6 regex, will not catch everything but should be sufficent for now. +IPV6 = re.compile(r""" + \b(?:[a-f0-9]{1,4}:|:){2,7}(?:[a-f0-9]{1,4}|:)\b + """, re.IGNORECASE | re.VERBOSE) + +# Capture email addresses including common defangs. +EMAIL = re.compile(r""" + ( + [a-z0-9_.+-]+ + [\(\[{\x20]* + (?:@|\Wat\W) + [\)\]}\x20]* + [a-z0-9-]+ + (?: + (?: + (?: + \x20* + """ + SEPARATOR_DEFANGS + r""" + \x20* + )* + \. + (?: + \x20* + """ + SEPARATOR_DEFANGS + r""" + \x20* + )* + | + \W+dot\W+ + ) + [a-z0-9-]+? + )+ + ) + """ + END_PUNCTUATION + r""" + (?=\s|$) + """, re.IGNORECASE | re.VERBOSE | re.UNICODE) + +MD5 = re.compile(r"(?:[^a-fA-F\d]|\b)([a-fA-F\d]{32})(?:[^a-fA-F\d]|\b)") +SHA1 = re.compile(r"(?:[^a-fA-F\d]|\b)([a-fA-F\d]{40})(?:[^a-fA-F\d]|\b)") +SHA256 = re.compile(r"(?:[^a-fA-F\d]|\b)([a-fA-F\d]{64})(?:[^a-fA-F\d]|\b)") +SHA512 = re.compile( + r"(?:[^a-fA-F\d]|\b)([a-fA-F\d]{128})(?:[^a-fA-F\d]|\b)") + +# YARA regex. +YARA_PARSE = re.compile(r""" + (?:^|\s) + ( + (?: + \s*?import\s+?"[^\r\n]*?[\r\n]+| + \s*?include\s+?"[^\r\n]*?[\r\n]+| + \s*?//[^\r\n]*[\r\n]+| + \s*?/\*.*?\*/\s*? + )* + (?: + \s*?private\s+| + \s*?global\s+ + )* + rule\s*? + \w+\s*? + (?: + :[\s\w]+ + )? + \s+\{ + .*? + condition\s*?: + .*? + \s*\} + ) + (?:$|\s) + """, re.MULTILINE | re.DOTALL | re.VERBOSE) + +CREDIT_CARD = re.compile(r"[0-9]{4}[ ]?[-]?[0-9]{4}[ ]?[-]?[0-9]{4}[ ]?[-]?[0-9]{4}") + +rintels = [(GENERIC_URL, "GENERIC_URL"), + (BRACKET_URL, "BRACKET_URL"), + (BACKSLASH_URL, "BACKSLASH_URL"), + (HEXENCODED_URL, "HEXENCODED_URL"), + (URLENCODED_URL, "URLENCODED_URL"), + (B64ENCODED_URL, "B64ENCODED_URL"), + (IPV4, "IPV4"), + (IPV6, "IPV6"), + (EMAIL, "EMAIL"), + (MD5, "MD5"), + (SHA1, "SHA1"), + (SHA256, "SHA256"), + (SHA512, "SHA512"), + (YARA_PARSE, "YARA_PARSE"), + (CREDIT_CARD, "CREDIT_CARD")] + + +rscript = re.compile(r'<(script|SCRIPT).*(src|SRC)=([^\s>]+)') +rhref = re.compile(r'<[aA].*(href|HREF)=([^\s>]+)') +rendpoint = re.compile(r'[\'"](/.*?)[\'"]|[\'"](http.*?)[\'"]') +rentropy = re.compile(r'[\w-]{16,45}') diff --git a/core/lib/Photon/core/requester.py b/core/lib/Photon/core/requester.py new file mode 100644 index 00000000..9711c18e --- /dev/null +++ b/core/lib/Photon/core/requester.py @@ -0,0 +1,72 @@ +import random +import time + +import requests +from requests.exceptions import TooManyRedirects + + +SESSION = requests.Session() +SESSION.max_redirects = 3 + +def requester( + url, + main_url=None, + delay=0, + cook=None, + headers=None, + timeout=10, + host=None, + proxies=[None], + user_agents=[None], + failed=None, + processed=None + ): + """Handle the requests and return the response body.""" + cook = cook or set() + headers = headers or set() + user_agents = user_agents or ['Photon'] + failed = failed or set() + processed = processed or set() + # Mark the URL as crawled + processed.add(url) + # Pause/sleep the program for specified time + time.sleep(delay) + + def make_request(url): + """Default request""" + final_headers = headers or { + 'Host': host, + # Selecting a random user-agent + 'User-Agent': random.choice(user_agents), + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + 'Accept-Encoding': 'gzip', + 'DNT': '1', + 'Connection': 'close', + } + try: + response = SESSION.get( + url, + cookies=cook, + headers=final_headers, + verify=False, + timeout=timeout, + stream=True, + proxies=random.choice(proxies) + ) + except TooManyRedirects: + return 'dummy' + + if 'text/html' in response.headers['content-type'] or \ + 'text/plain' in response.headers['content-type']: + if response.status_code != '404': + return response.text + else: + response.close() + failed.add(url) + return 'dummy' + else: + response.close() + return 'dummy' + + return make_request(url) diff --git a/core/lib/Photon/core/updater.py b/core/lib/Photon/core/updater.py new file mode 100644 index 00000000..3583923e --- /dev/null +++ b/core/lib/Photon/core/updater.py @@ -0,0 +1,43 @@ +import os +import re +import sys +from os import path +sys.path.append(os.path.abspath('.')) + +from .colors import run, que, good, green, end, info +from .requester import requester + + +def updater(): + """Update the current installation. + + git clones the latest version and merges it with the current directory. + """ + print('%s Checking for updates' % run) + # Changes must be separated by ; + changes = '''major bug fixes;removed ninja mode;dropped python < 3.2 support;fixed unicode output;proxy support;more intels''' + latest_commit = requester('https://raw.githubusercontent.com/s0md3v/Photon/master/core/updater.py', host='raw.githubusercontent.com') + # Just a hack to see if a new version is available + if changes not in latest_commit: + changelog = re.search(r"changes = '''(.*?)'''", latest_commit) + # Splitting the changes to form a list + changelog = changelog.group(1).split(';') + print('%s A new version of Photon is available.' % good) + print('%s Changes:' % info) + for change in changelog: # print changes + print('%s>%s %s' % (green, end, change)) + + current_path = os.getcwd().split('/') # if you know it, you know it + folder = current_path[-1] # current directory name + path = '/'.join(current_path) # current directory path + choice = input('%s Would you like to update? [Y/n] ' % que).lower() + + if choice != 'n': + print('%s Updating Photon' % run) + os.system('git clone --quiet https://github.com/s0md3v/Photon %s' + % (folder)) + os.system('cp -r %s/%s/* %s && rm -r %s/%s/ 2>/dev/null' + % (path, folder, path, path, folder)) + print('%s Update successful!' % good) + else: + print('%s Photon is up to date!' % good) diff --git a/core/lib/Photon/core/user-agents.txt b/core/lib/Photon/core/user-agents.txt new file mode 100644 index 00000000..dc25d833 --- /dev/null +++ b/core/lib/Photon/core/user-agents.txt @@ -0,0 +1,18 @@ +Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.0) Opera 7.02 Bork-edition [en] +Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.0.3705) +Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729) +Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html) +Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0) +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246 +Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; FSL 7.0.7.01001) +Mozilla/5.0 (Windows NT 5.1; rv:13.0) Gecko/20100101 Firefox/13.0.1 +Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.112 Safari/535.1 +Mozilla/5.0 (Windows NT 6.1; rv:5.0) Gecko/20100101 Firefox/5.02 +Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36 +Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727) +Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0 +Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36 +Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1 +Mozilla/5.0 (X11; U; Linux x86_64; de; rv:1.9.2.8) Gecko/20100723 Ubuntu/10.04 (lucid) Firefox/3.6.8 +Opera/9.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.01 diff --git a/core/lib/Photon/core/utils.py b/core/lib/Photon/core/utils.py new file mode 100644 index 00000000..3c6a7fe4 --- /dev/null +++ b/core/lib/Photon/core/utils.py @@ -0,0 +1,208 @@ +import requests +import math +import os.path +import re +import argparse +import sys +sys.path.append(os.path.abspath('.')) + +import tld + +from .colors import info +from .config import VERBOSE, BAD_TYPES + +from urllib.parse import urlparse + + +def regxy(pattern, response, supress_regex, custom): + """Extract a string based on regex pattern supplied by user.""" + try: + matches = re.findall(r'%s' % pattern, response) + for match in matches: + verb('Custom regex', match) + custom.add(match) + except: + supress_regex = True + + +def is_link(url, processed, files): + """ + Determine whether or not a link should be crawled + A url should not be crawled if it + - Is a file + - Has already been crawled + + Args: + url: str Url to be processed + processed: list[str] List of urls that have already been crawled + + Returns: + bool If `url` should be crawled + """ + if url not in processed: + if url.startswith('#') or url.startswith('javascript:'): + return False + is_file = url.endswith(BAD_TYPES) + if is_file: + files.add(url) + return False + return True + return False + + +def remove_regex(urls, regex): + """ + Parse a list for non-matches to a regex. + + Args: + urls: iterable of urls + regex: string regex to be parsed for + + Returns: + list of strings not matching regex + """ + + if not regex: + return urls + + # To avoid iterating over the characters of a string + if not isinstance(urls, (list, set, tuple)): + urls = [urls] + + try: + non_matching_urls = [url for url in urls if not re.search(regex, url)] + except TypeError: + return [] + + return non_matching_urls + + +def writer(datasets, dataset_names, output_dir): + """Write the results.""" + for dataset, dataset_name in zip(datasets, dataset_names): + if dataset: + filepath = output_dir + '/' + dataset_name + '.txt' + with open(filepath, 'w+') as out_file: + joined = '\n'.join(dataset) + out_file.write(str(joined.encode('utf-8').decode('utf-8'))) + out_file.write('\n') + + +def timer(diff, processed): + """Return the passed time.""" + # Changes seconds into minutes and seconds + minutes, seconds = divmod(diff, 60) + try: + # Finds average time taken by requests + time_per_request = diff / float(len(processed)) + except ZeroDivisionError: + time_per_request = 0 + return minutes, seconds, time_per_request + + +def entropy(string): + """Calculate the entropy of a string.""" + entropy = 0 + for number in range(256): + result = float(string.encode('utf-8').count( + chr(number))) / len(string.encode('utf-8')) + if result != 0: + entropy = entropy - result * math.log(result, 2) + return entropy + + +def xml_parser(response): + """Extract links from .xml files.""" + # Regex for extracting URLs + return re.findall(r'(.*?)', response) + + +def verb(kind, string): + """Enable verbose output.""" + if VERBOSE: + print('%s %s: %s' % (info, kind, string)) + + +def extract_headers(headers): + """This function extracts valid headers from interactive input.""" + sorted_headers = {} + matches = re.findall(r'(.*):\s(.*)', headers) + for match in matches: + header = match[0] + value = match[1] + try: + if value[-1] == ',': + value = value[:-1] + sorted_headers[header] = value + except IndexError: + pass + return sorted_headers + + +def top_level(url, fix_protocol=True): + """Extract the top level domain from an URL.""" + ext = tld.get_tld(url, fix_protocol=fix_protocol) + toplevel = '.'.join(urlparse(url).netloc.split('.')[-2:]).split( + ext)[0] + ext + return toplevel + + +def is_proxy_list(v, proxies): + if os.path.isfile(v): + with open(v, 'r') as _file: + for line in _file: + line = line.strip() + if re.match(r"((http|socks5):\/\/.)?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{1,5})", line) or \ + re.match(r"((http|socks5):\/\/.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}:(\d{1,5})", line): + proxies.append({"http": line, + "https": line}) + else: + print("%s ignored" % line) + if proxies: + return True + return False + + +def proxy_type(v): + """ Match IP:PORT or DOMAIN:PORT in a losse manner """ + proxies = [] + if re.match(r"((http|socks5):\/\/.)?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{1,5})", v): + proxies.append({"http": v, + "https": v}) + return proxies + elif re.match(r"((http|socks5):\/\/.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}:(\d{1,5})", v): + proxies.append({"http": v, + "https": v}) + return proxies + elif is_proxy_list(v, proxies): + return proxies + else: + raise argparse.ArgumentTypeError( + "Proxy should follow IP:PORT or DOMAIN:PORT format") + + +def luhn(purported): + + # sum_of_digits (index * 2) + LUHN_ODD_LOOKUP = (0, 2, 4, 6, 8, 1, 3, 5, 7, 9) + + if not isinstance(purported, str): + purported = str(purported) + try: + evens = sum(int(p) for p in purported[-1::-2]) + odds = sum(LUHN_ODD_LOOKUP[int(p)] for p in purported[-2::-2]) + return (evens + odds) % 10 == 0 + except ValueError: # Raised if an int conversion fails + return False + + +def is_good_proxy(pip): + try: + requests.get('http://example.com/', proxies=pip, timeout=3) + except requests.exceptions.ConnectTimeout as e: + return False + except Exception as detail: + return False + + return True + diff --git a/core/lib/Photon/core/zap.py b/core/lib/Photon/core/zap.py new file mode 100644 index 00000000..8f1221b2 --- /dev/null +++ b/core/lib/Photon/core/zap.py @@ -0,0 +1,61 @@ +import re +import requests +import random +import sys +import os +from os import path +sys.path.append(os.path.abspath('.')) + +from .utils import verb, xml_parser +from .colors import run, good +from plugins.wayback import time_machine + + +def zap(input_url, archive, domain, host, internal, robots, proxies): + """Extract links from robots.txt and sitemap.xml.""" + if archive: + print('%s Fetching URLs from archive.org' % run) + if False: + archived_urls = time_machine(domain, 'domain') + else: + archived_urls = time_machine(host, 'host') + print('%s Retrieved %i URLs from archive.org' % ( + good, len(archived_urls) - 1)) + for url in archived_urls: + verb('Internal page', url) + internal.add(url) + # Makes request to robots.txt + response = requests.get(input_url + '/robots.txt', + proxies=random.choice(proxies)).text + # Making sure robots.txt isn't some fancy 404 page + if '(?s)', '', response) + res = re.sub(r'<[^<]+?>', '', res) + matches = rintel[0].findall(res) + if matches: + for match in matches: + verb('Intel', match) + bad_intel.add((match, rintel[1], url)) + + +def js_extractor(response): + """Extract js files from the response body""" + # Extract .js files + matches = rscript.findall(response) + for match in matches: + match = match[2].replace('\'', '').replace('"', '') + verb('JS file', match) + bad_scripts.add(match) + +def remove_file(url): + if url.count('/') > 2: + replacable = re.search(r'/[^/]*?$', url).group() + if replacable != '/': + return url.replace(replacable, '') + else: + return url + else: + return url + +def extractor(url): + """Extract details from the response body.""" + response = requester(url, main_url, delay, cook, headers, timeout, host, proxies, user_agents, failed, processed) + if clone: + mirror(url, response) + matches = rhref.findall(response) + for link in matches: + # Remove everything after a "#" to deal with in-page anchors + link = link[1].replace('\'', '').replace('"', '').split('#')[0] + # Checks if the URLs should be crawled + if is_link(link, processed, files): + if link[:4] == 'http': + if link.startswith(main_url): + verb('Internal page', link) + internal.add(link) + else: + verb('External page', link) + external.add(link) + elif link[:2] == '//': + if link.split('/')[2].startswith(host): + verb('Internal page', link) + internal.add(schema + '://' + link) + else: + verb('External page', link) + external.add(link) + elif link[:1] == '/': + verb('Internal page', link) + internal.add(remove_file(url) + link) + else: + verb('Internal page', link) + usable_url = remove_file(url) + if usable_url.endswith('/'): + internal.add(usable_url + link) + elif link.startswith('/'): + internal.add(usable_url + link) + else: + internal.add(usable_url + '/' + link) + + if not only_urls: + intel_extractor(url, response) + js_extractor(response) + if args.regex and not supress_regex: + regxy(args.regex, response, supress_regex, custom) + if api: + matches = rentropy.findall(response) + for match in matches: + if entropy(match) >= 4: + verb('Key', match) + keys.add(url + ': ' + match) + + +def jscanner(url): + """Extract endpoints from JavaScript code.""" + response = requester(url, main_url, delay, cook, headers, timeout, host, proxies, user_agents, failed, processed) + # Extract URLs/endpoints + matches = rendpoint.findall(response) + # Iterate over the matches, match is a tuple + for match in matches: + # Combining the items because one of them is always empty + match = match[0] + match[1] + # Making sure it's not some JavaScript code + if not re.search(r'[}{><"\']', match) and not match == '/': + verb('JS endpoint', match) + endpoints.add(match) + + +# Records the time at which crawling started +then = time.time() + +# Step 1. Extract urls from robots.txt & sitemap.xml +zap(main_url, args.archive, domain, host, internal, robots, proxies) + +# This is so the level 1 emails are parsed as well +internal = set(remove_regex(internal, args.exclude)) + +# Step 2. Crawl recursively to the limit specified in "crawl_level" +for level in range(crawl_level): + # Links to crawl = (all links - already crawled links) - links not to crawl + links = remove_regex(internal - processed, args.exclude) + # If links to crawl are 0 i.e. all links have been crawled + if not links: + break + # if crawled links are somehow more than all links. Possible? ;/ + elif len(internal) <= len(processed): + if len(internal) > 2 + len(args.seeds): + break + print('%s Level %i: %i URLs' % (run, level + 1, len(links))) + try: + flash(extractor, links, thread_count) + except KeyboardInterrupt: + print('') + break + +if not only_urls: + for match in bad_scripts: + if match.startswith(main_url): + scripts.add(match) + elif match.startswith('/') and not match.startswith('//'): + scripts.add(main_url + match) + elif not match.startswith('http') and not match.startswith('//'): + scripts.add(main_url + '/' + match) + # Step 3. Scan the JavaScript files for endpoints + print('%s Crawling %i JavaScript files' % (run, len(scripts))) + flash(jscanner, scripts, thread_count) + + for url in internal: + if '=' in url: + fuzzable.add(url) + + for match, intel_name, url in bad_intel: + if isinstance(match, tuple): + for x in match: # Because "match" is a tuple + if x != '': # If the value isn't empty + if intel_name == "CREDIT_CARD": + if not luhn(match): + # garbage number + continue + intel.add("%s:%s" % (intel_name, x)) + else: + if intel_name == "CREDIT_CARD": + if not luhn(match): + # garbage number + continue + intel.add("%s:%s:%s" % (url, intel_name, match)) + for url in external: + try: + if top_level(url, fix_protocol=True) in INTELS: + intel.add(url) + except: + pass + +# Records the time at which crawling stopped +now = time.time() +# Finds total time taken +diff = (now - then) +minutes, seconds, time_per_request = timer(diff, processed) + +# Step 4. Save the results +if not os.path.exists(output_dir): # if the directory doesn't exist + os.mkdir(output_dir) # create a new directory + +datasets = [files, intel, robots, custom, failed, internal, scripts, + external, fuzzable, endpoints, keys] +dataset_names = ['files', 'intel', 'robots', 'custom', 'failed', 'internal', + 'scripts', 'external', 'fuzzable', 'endpoints', 'keys'] + +writer(datasets, dataset_names, output_dir) +# Printing out results +print(('%s-%s' % (red, end)) * 50) +for dataset, dataset_name in zip(datasets, dataset_names): + if dataset: + print('%s %s: %s' % (good, dataset_name.capitalize(), len(dataset))) +print(('%s-%s' % (red, end)) * 50) + +print('%s Total requests made: %i' % (info, len(processed))) +print('%s Total time taken: %i minutes %i seconds' % (info, minutes, seconds)) +print('%s Requests per second: %i' % (info, int(len(processed) / diff))) + +datasets = { + 'files': list(files), 'intel': list(intel), 'robots': list(robots), + 'custom': list(custom), 'failed': list(failed), 'internal': list(internal), + 'scripts': list(scripts), 'external': list(external), + 'fuzzable': list(fuzzable), 'endpoints': list(endpoints), + 'keys': list(keys) +} + +if args.dns: + print('%s Enumerating subdomains' % run) + from plugins.find_subdomains import find_subdomains + subdomains = find_subdomains(domain) + print('%s %i subdomains found' % (info, len(subdomains))) + writer([subdomains], ['subdomains'], output_dir) + datasets['subdomains'] = subdomains + from plugins.dnsdumpster import dnsdumpster + print('%s Generating DNS map' % run) + dnsdumpster(domain, output_dir) + +if args.export: + from plugins.exporter import exporter + # exporter(directory, format, datasets) + exporter(output_dir, args.export, datasets) + +print('%s Results saved in %s%s%s directory' % (good, green, output_dir, end)) + +if args.std: + for string in datasets[args.std]: + sys.stdout.write(string + '\n') diff --git a/core/lib/Photon/plugins/__init__.py b/core/lib/Photon/plugins/__init__.py new file mode 100644 index 00000000..2be64901 --- /dev/null +++ b/core/lib/Photon/plugins/__init__.py @@ -0,0 +1 @@ +"""Plugins for Photon.""" diff --git a/core/lib/Photon/plugins/dnsdumpster.py b/core/lib/Photon/plugins/dnsdumpster.py new file mode 100644 index 00000000..29a9614a --- /dev/null +++ b/core/lib/Photon/plugins/dnsdumpster.py @@ -0,0 +1,22 @@ +"""Support for dnsdumpster.com.""" +import re + +import requests + + +def dnsdumpster(domain, output_dir): + """Query dnsdumpster.com.""" + response = requests.Session().get('https://dnsdumpster.com/').text + csrf_token = re.search( + r"name=\"csrfmiddlewaretoken\" value=\"(.*?)\"", response).group(1) + + cookies = {'csrftoken': csrf_token} + headers = {'Referer': 'https://dnsdumpster.com/'} + data = {'csrfmiddlewaretoken': csrf_token, 'targetip': domain} + response = requests.Session().post( + 'https://dnsdumpster.com/', cookies=cookies, data=data, headers=headers) + + image = requests.get('https://dnsdumpster.com/static/map/%s.png' % domain) + if image.status_code == 200: + with open('%s/%s.png' % (output_dir, domain), 'wb') as f: + f.write(image.content) diff --git a/core/lib/Photon/plugins/exporter.py b/core/lib/Photon/plugins/exporter.py new file mode 100644 index 00000000..207cc674 --- /dev/null +++ b/core/lib/Photon/plugins/exporter.py @@ -0,0 +1,24 @@ +"""Support for exporting the results.""" +import csv +import json + + +def exporter(directory, method, datasets): + """Export the results.""" + if method.lower() == 'json': + # Convert json_dict to a JSON styled string + json_string = json.dumps(datasets, indent=4) + savefile = open('{}/exported.json'.format(directory), 'w+') + savefile.write(json_string) + savefile.close() + + if method.lower() == 'csv': + with open('{}/exported.csv'.format(directory), 'w+') as csvfile: + csv_writer = csv.writer( + csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL) + for key, values in datasets.items(): + if values is None: + csv_writer.writerow([key]) + else: + csv_writer.writerow([key] + values) + csvfile.close() diff --git a/core/lib/Photon/plugins/find_subdomains.py b/core/lib/Photon/plugins/find_subdomains.py new file mode 100644 index 00000000..98c9109a --- /dev/null +++ b/core/lib/Photon/plugins/find_subdomains.py @@ -0,0 +1,14 @@ +"""Support for findsubdomains.com.""" +from re import findall + +from requests import get + + +def find_subdomains(domain): + """Find subdomains according to the TLD.""" + result = set() + response = get('https://findsubdomains.com/subdomains-of/' + domain).text + matches = findall(r'(?s)
(.*?)
', response) + for match in matches: + result.add(match.replace(' ', '').replace('\n', '')) + return list(result) diff --git a/core/lib/Photon/plugins/wayback.py b/core/lib/Photon/plugins/wayback.py new file mode 100644 index 00000000..d15942f6 --- /dev/null +++ b/core/lib/Photon/plugins/wayback.py @@ -0,0 +1,22 @@ +"""Support for archive.org.""" +import datetime +import json + +from requests import get + + +def time_machine(host, mode): + """Query archive.org.""" + now = datetime.datetime.now() + to = str(now.year) + str(now.day) + str(now.month) + if now.month > 6: + fro = str(now.year) + str(now.day) + str(now.month - 6) + else: + fro = str(now.year - 1) + str(now.day) + str(now.month + 6) + url = "http://web.archive.org/cdx/search?url=%s&matchType=%s&collapse=urlkey&fl=original&filter=mimetype:text/html&filter=statuscode:200&output=json&from=%s&to=%s" % (host, mode, fro, to) + response = get(url).text + parsed = json.loads(response)[1:] + urls = [] + for item in parsed: + urls.append(item[0]) + return urls diff --git a/core/lib/Photon/requirements.txt b/core/lib/Photon/requirements.txt new file mode 100644 index 00000000..19be6bc4 --- /dev/null +++ b/core/lib/Photon/requirements.txt @@ -0,0 +1,4 @@ +requests +requests[socks] +urllib3 +tld \ No newline at end of file diff --git a/core/lib/__pycache__/FileUtils.cpython-37.pyc b/core/lib/__pycache__/FileUtils.cpython-37.pyc new file mode 100644 index 00000000..df4f5d53 Binary files /dev/null and b/core/lib/__pycache__/FileUtils.cpython-37.pyc differ diff --git a/core/lib/dnsdump_mod/DNSDumpsterAPI.py b/core/lib/dnsdump_mod/DNSDumpsterAPI.py new file mode 100644 index 00000000..685884ff --- /dev/null +++ b/core/lib/dnsdump_mod/DNSDumpsterAPI.py @@ -0,0 +1,118 @@ +""" +This is the (unofficial) Python API for dnsdumpster.com Website. +Using this code, you can retrieve subdomains + +""" + + +import requests +import re +import sys + +from bs4 import BeautifulSoup + + +class DNSDumpsterAPI(object): + + """DNSDumpsterAPI Main Handler""" + + def __init__(self, verbose=False): + self.verbose = verbose + + def display_message(self, s): + if self.verbose: + print('[verbose] %s' % s) + + + def retrieve_results(self, table): + res = [] + trs = table.findAll('tr') + for tr in trs: + tds = tr.findAll('td') + pattern_ip = r'([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})' + ip = re.findall(pattern_ip, tds[1].text)[0] + domain = tds[0].text.replace('\n', '').split(' ')[0] + header = ' '.join(tds[0].text.replace('\n', '').split(' ')[1:]) + reverse_dns = tds[1].find('span', attrs={}).text + + additional_info = tds[2].text + country = tds[2].find('span', attrs={}).text + autonomous_system = additional_info.split(' ')[0] + provider = ' '.join(additional_info.split(' ')[1:]) + provider = provider.replace(country, '') + data = {'domain': domain, + 'ip': ip, + 'reverse_dns': reverse_dns, + 'as': autonomous_system, + 'provider': provider, + 'country': country, + 'header': header} + res.append(data) + return res + + def retrieve_txt_record(self, table): + res = [] + for td in table.findAll('td'): + res.append(td.text) + return res + + + def search(self, domain): + dnsdumpster_url = 'https://dnsdumpster.com/' + s = requests.session() + + req = s.get(dnsdumpster_url) + soup = BeautifulSoup(req.content, 'html.parser') + csrf_middleware = soup.findAll('input', attrs={'name': 'csrfmiddlewaretoken'})[0]['value'] + self.display_message('Retrieved token: %s' % csrf_middleware) + + cookies = {'csrftoken': csrf_middleware} + headers = {'Referer': dnsdumpster_url} + data = {'csrfmiddlewaretoken': csrf_middleware, 'targetip': domain} + req = s.post(dnsdumpster_url, cookies=cookies, data=data, headers=headers) + + if req.status_code != 200: + print( + u"Unexpected status code from {url}: {code}".format( + url=dnsdumpster_url, code=req.status_code), + file=sys.stderr, + ) + return [] + + if 'error' in req.content.decode('utf-8'): + print("There was an error getting results", file=sys.stderr) + return [] + + soup = BeautifulSoup(req.content, 'html.parser') + tables = soup.findAll('table') + + res = {} + res['domain'] = domain + res['dns_records'] = {} + res['dns_records']['dns'] = self.retrieve_results(tables[0]) + res['dns_records']['mx'] = self.retrieve_results(tables[1]) + res['dns_records']['txt'] = self.retrieve_txt_record(tables[2]) + res['dns_records']['host'] = self.retrieve_results(tables[3]) + + # Network mapping image + try: + val = soup.find('img', attrs={'class': 'img-responsive'})['src'] + tmp_url = '{}{}'.format(dnsdumpster_url, val) + image_data = requests.get(tmp_url).content.encode('base64') + except: + image_data = None + finally: + res['image_data'] = image_data + + # XLS hosts. + # eg. tsebo.com-201606131255.xlsx + try: + pattern = r'https://dnsdumpster.com/static/xls/' + domain + '-[0-9]{12}\.xlsx' + xls_url = re.findall(pattern, req.content)[0] + xls_data = requests.get(xls_url).content.encode('base64') + except: + xls_data = None + finally: + res['xls_data'] = xls_data + + return res diff --git a/core/lib/dnsdump_mod/DNSDumpsterAPI.pyc b/core/lib/dnsdump_mod/DNSDumpsterAPI.pyc new file mode 100644 index 00000000..1ff1af6d Binary files /dev/null and b/core/lib/dnsdump_mod/DNSDumpsterAPI.pyc differ diff --git a/core/lib/dnsdump_mod/__init__.py b/core/lib/dnsdump_mod/__init__.py new file mode 100644 index 00000000..d493f670 --- /dev/null +++ b/core/lib/dnsdump_mod/__init__.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# coding: utf-8 + +#-:-:-:-:-:-:-:-:-:-:-:-:# +# TIDoS Framework # +#-:-:-:-:-:-:-:-:-:-:-:-:# + +#This module requires TIDoS Framework +#https://github.com/0xInfection/TIDoS-Framework + +pass diff --git a/core/lib/dnsdump_mod/__init__.pyc b/core/lib/dnsdump_mod/__init__.pyc new file mode 100644 index 00000000..382a88d4 Binary files /dev/null and b/core/lib/dnsdump_mod/__init__.pyc differ diff --git a/core/lib/dnsdump_mod/__pycache__/DNSDumpsterAPI.cpython-37.pyc b/core/lib/dnsdump_mod/__pycache__/DNSDumpsterAPI.cpython-37.pyc new file mode 100644 index 00000000..949722c7 Binary files /dev/null and b/core/lib/dnsdump_mod/__pycache__/DNSDumpsterAPI.cpython-37.pyc differ diff --git a/core/lib/dnsdump_mod/__pycache__/__init__.cpython-37.pyc b/core/lib/dnsdump_mod/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 00000000..57599102 Binary files /dev/null and b/core/lib/dnsdump_mod/__pycache__/__init__.cpython-37.pyc differ diff --git a/lib/emailprotectionslib/build/lib.linux-x86_64-2.7/emailprotectionslib/Resolver.py b/core/lib/emailprotectionslib/Resolver.py similarity index 100% rename from lib/emailprotectionslib/build/lib.linux-x86_64-2.7/emailprotectionslib/Resolver.py rename to core/lib/emailprotectionslib/Resolver.py diff --git a/lib/emailprotectionslib/build/lib.linux-x86_64-2.7/emailprotectionslib/__init__.py b/core/lib/emailprotectionslib/__init__.py similarity index 100% rename from lib/emailprotectionslib/build/lib.linux-x86_64-2.7/emailprotectionslib/__init__.py rename to core/lib/emailprotectionslib/__init__.py diff --git a/core/lib/emailprotectionslib/__pycache__/Resolver.cpython-37.pyc b/core/lib/emailprotectionslib/__pycache__/Resolver.cpython-37.pyc new file mode 100644 index 00000000..734904d8 Binary files /dev/null and b/core/lib/emailprotectionslib/__pycache__/Resolver.cpython-37.pyc differ diff --git a/core/lib/emailprotectionslib/__pycache__/__init__.cpython-37.pyc b/core/lib/emailprotectionslib/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 00000000..66706854 Binary files /dev/null and b/core/lib/emailprotectionslib/__pycache__/__init__.cpython-37.pyc differ diff --git a/core/lib/emailprotectionslib/__pycache__/dmarc.cpython-37.pyc b/core/lib/emailprotectionslib/__pycache__/dmarc.cpython-37.pyc new file mode 100644 index 00000000..b8ee7b5c Binary files /dev/null and b/core/lib/emailprotectionslib/__pycache__/dmarc.cpython-37.pyc differ diff --git a/core/lib/emailprotectionslib/__pycache__/spf.cpython-37.pyc b/core/lib/emailprotectionslib/__pycache__/spf.cpython-37.pyc new file mode 100644 index 00000000..6a3b8de5 Binary files /dev/null and b/core/lib/emailprotectionslib/__pycache__/spf.cpython-37.pyc differ diff --git a/core/lib/emailprotectionslib/dmarc.py b/core/lib/emailprotectionslib/dmarc.py new file mode 100644 index 00000000..29a13f94 --- /dev/null +++ b/core/lib/emailprotectionslib/dmarc.py @@ -0,0 +1,150 @@ +import re +import logging +import core.lib.emailprotectionslib.Resolver +import tldextract + + +class DmarcRecord(object): + + def __init__(self, domain): + self.domain = domain + self.version = None + self.policy = None + self.pct = None + self.rua = None + self.ruf = None + self.subdomain_policy = None + self.dkim_alignment = None + self.spf_alignment = None + self.record = None + + def __str__(self): + return self.record + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + def _store_tag_data(self, tag_name, tag_value): + if tag_name == "v": + self.version = tag_value + elif tag_name == "p": + self.policy = tag_value + elif tag_name == "pct": + self.pct = tag_value + elif tag_name == "rua": + self.rua = tag_value + elif tag_name == "ruf": + self.ruf = tag_value + elif tag_name == "sp": + self.subdomain_policy = tag_value + elif tag_name == "adkim": + self.dkim_alignment = tag_value + elif tag_name == "aspf": + self.spf_alignment = tag_value + + def process_tags(self, dmarc_string): + TAG_NAME, TAG_VALUE = (0, 1) + tags = _extract_tags(dmarc_string) + for tag in tags: + self._store_tag_data(tag[TAG_NAME], tag[TAG_VALUE]) + + def is_record_strong(self): + record_strong = False + if self.policy is not None and (self.policy == "reject" or self.policy == "quarantine"): + record_strong = True + + if not record_strong: + try: + record_strong = self.is_org_domain_strong() + except OrgDomainException: + record_strong = False + + return record_strong + + def is_subdomain_policy_strong(self): + if self.subdomain_policy is not None: + return self.subdomain_policy == "reject" or self.subdomain_policy == "quarantine" + + def is_org_domain_strong(self): + org_record = self.get_org_record() + subdomain_policy_strong = org_record.is_subdomain_policy_strong() + if subdomain_policy_strong is not None: + return subdomain_policy_strong + else: + return org_record.is_record_strong() + + def get_org_record(self): + org_domain = self.get_org_domain() + if org_domain == self.domain: + raise OrgDomainException + else: + return DmarcRecord.from_domain(org_domain) + + def get_org_domain(self): + try: + domain_parts = tldextract.extract(self.domain) + return "%(domain)s.%(tld)s" % {'domain': domain_parts.domain, 'tld': domain_parts.suffix} + except TypeError: + return None + + @staticmethod + def from_dmarc_string(dmarc_string, domain): + if dmarc_string is not None: + dmarc_record = DmarcRecord(domain) + dmarc_record.record = dmarc_string + dmarc_record.process_tags(dmarc_string) + return dmarc_record + else: + return DmarcRecord(domain) + + @staticmethod + def from_domain(domain): + dmarc_string = get_dmarc_string_for_domain(domain) + if dmarc_string is not None: + return DmarcRecord.from_dmarc_string(dmarc_string, domain) + else: + return DmarcRecord(domain) + + +def _extract_tags(dmarc_record): + dmarc_pattern = "(\w+)=(.*?)(?:; ?|$)" + return re.findall(dmarc_pattern, dmarc_record) + + +def _merge_txt_record_strings(txt_record): + # DMARC spec requires that TXT records containing multiple strings be cat'd together. + string_pattern = re.compile('"([^"]*)"') + txt_record_strings = string_pattern.findall(txt_record) + return "".join(txt_record_strings) + + +def _match_dmarc_record(txt_record): + merged_txt_record = _merge_txt_record_strings(txt_record) + dmarc_pattern = re.compile('^(v=DMARC.*)') + potential_dmarc_match = dmarc_pattern.match(str(merged_txt_record)) + return potential_dmarc_match + + +def _find_record_from_answers(txt_records): + dmarc_record = None + for record in txt_records: + potential_match = _match_dmarc_record(record[2]) + if potential_match is not None: + dmarc_record = potential_match.group(1) + return dmarc_record + + +def get_dmarc_string_for_domain(domain): + try: + txt_records = Resolver.resolver().query("_dmarc." + domain, query_type="TXT") + return _find_record_from_answers(txt_records) + except IOError: + # This is returned usually as a NXDOMAIN, which is expected. + return None + except TypeError as error: + logging.exception(error) + return None + + +class OrgDomainException(Exception): + pass diff --git a/core/lib/emailprotectionslib/spf.py b/core/lib/emailprotectionslib/spf.py new file mode 100644 index 00000000..e2c4d70d --- /dev/null +++ b/core/lib/emailprotectionslib/spf.py @@ -0,0 +1,192 @@ +import re +import logging +import core.lib.emailprotectionslib.Resolver + + +class SpfRecord(object): + + def __init__(self, domain): + self.version = None + self.record = None + self.mechanisms = None + self.all_string = None + self.domain = domain + self.recursion_depth = 0 + + def __str__(self): + return self.record + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + def get_redirected_record(self): + if self.recursion_depth >= 10: + return SpfRecord(self.get_redirect_domain()) + else: + redirect_domain = self.get_redirect_domain() + if redirect_domain is not None: + redirect_record = SpfRecord.from_domain(redirect_domain) + redirect_record.recursion_depth = self.recursion_depth + 1 + return redirect_record + + def get_redirect_domain(self): + redirect_domain = None + if self.mechanisms is not None: + for mechanism in self.mechanisms: + redirect_mechanism = re.match('redirect=(.*)', mechanism) + if redirect_mechanism is not None: + redirect_domain = redirect_mechanism.group(1) + return redirect_domain + + def get_include_domains(self): + include_domains = [] + if self.mechanisms is not None: + for mechanism in self.mechanisms: + include_mechanism = re.match('include:(.*)', mechanism) + if include_mechanism is not None: + include_domains.append(include_mechanism.group(1)) + return include_domains + else: + return [] + + def get_include_records(self): + if self.recursion_depth >= 10: + return {} + else: + include_domains = self.get_include_domains() + include_records = {} + for domain in include_domains: + try: + include_records[domain] = SpfRecord.from_domain(domain) + include_records[domain].recursion_depth = self.recursion_depth + 1 + except IOError as e: + logging.exception(e) + include_records[domain] = None + return include_records + + def _is_all_mechanism_strong(self): + strong_spf_all_string = True + if self.all_string is not None: + if not (self.all_string == "~all" or self.all_string == "-all"): + strong_spf_all_string = False + else: + strong_spf_all_string = False + return strong_spf_all_string + + def _is_redirect_mechanism_strong(self): + redirect_domain = self.get_redirect_domain() + + if redirect_domain is not None: + redirect_mechanism = SpfRecord.from_domain(redirect_domain) + + if redirect_mechanism is not None: + return redirect_mechanism.is_record_strong() + else: + return False + else: + return False + + def _are_include_mechanisms_strong(self): + include_records = self.get_include_records() + for record in include_records: + if include_records[record] is not None and include_records[record].is_record_strong(): + return True + return False + + def is_record_strong(self): + strong_spf_record = self._is_all_mechanism_strong() + if strong_spf_record is False: + + redirect_strength = self._is_redirect_mechanism_strong() + include_strength = self._are_include_mechanisms_strong() + + strong_spf_record = False + + if redirect_strength is True: + strong_spf_record = True + + if include_strength is True: + strong_spf_record = True + return strong_spf_record + + @staticmethod + def from_spf_string(spf_string, domain): + if spf_string is not None: + spf_record = SpfRecord(domain) + spf_record.record = spf_string + spf_record.mechanisms = _extract_mechanisms(spf_string) + spf_record.version = _extract_version(spf_string) + spf_record.all_string = _extract_all_mechanism(spf_record.mechanisms) + return spf_record + else: + return SpfRecord(domain) + + @staticmethod + def from_domain(domain): + spf_string = get_spf_string_for_domain(domain) + if spf_string is not None: + return SpfRecord.from_spf_string(spf_string, domain) + else: + return SpfRecord(domain) + + +def _extract_version(spf_string): + version_pattern = "^v=(spf.)" + version_match = re.match(version_pattern, spf_string) + if version_match is not None: + return version_match.group(1) + else: + return None + + +def _extract_all_mechanism(mechanisms): + all_mechanism = None + for mechanism in mechanisms: + if re.match(".all", mechanism): + all_mechanism = mechanism + return all_mechanism + + +def _find_unique_mechanisms(initial_mechanisms, redirected_mechanisms): + return [x for x in redirected_mechanisms if x not in initial_mechanisms] + + +def _extract_mechanisms(spf_string): + spf_mechanism_pattern = ("(?:((?:\+|-|~)?(?:a|mx|ptr|include" + "|ip4|ip6|exists|redirect|exp|all)" + "(?:(?::|=|/)?(?:\S*))?) ?)") + spf_mechanisms = re.findall(spf_mechanism_pattern, spf_string) + + return spf_mechanisms + + +def _merge_txt_record_strings(txt_record): + # SPF spec requires that TXT records containing multiple strings be cat'd together. + string_pattern = re.compile('"([^"]*)"') + txt_record_strings = string_pattern.findall(txt_record) + return "".join(txt_record_strings) + + +def _match_spf_record(txt_record): + clean_txt_record = _merge_txt_record_strings(txt_record) + spf_pattern = re.compile('^(v=spf.*)') + potential_spf_match = spf_pattern.match(str(clean_txt_record)) + return potential_spf_match + + +def _find_record_from_answers(txt_records): + spf_record = None + for record in txt_records: + potential_match = _match_spf_record(record[2]) + if potential_match is not None: + spf_record = potential_match.group(1) + return spf_record + + +def get_spf_string_for_domain(domain): + try: + txt_records = Resolver.resolver().query(domain, query_type="TXT") + return _find_record_from_answers(txt_records) + except IOError as e: + # This is returned usually as a NXDOMAIN, which is expected. + return None diff --git a/lib/mechanize/mechanize/__init__.py b/core/lib/mechanize/__init__.py similarity index 100% rename from lib/mechanize/mechanize/__init__.py rename to core/lib/mechanize/__init__.py diff --git a/core/lib/mechanize/__pycache__/__init__.cpython-37.pyc b/core/lib/mechanize/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 00000000..087638ec Binary files /dev/null and b/core/lib/mechanize/__pycache__/__init__.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_auth.cpython-37.pyc b/core/lib/mechanize/__pycache__/_auth.cpython-37.pyc new file mode 100644 index 00000000..a8c0b601 Binary files /dev/null and b/core/lib/mechanize/__pycache__/_auth.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_clientcookie.cpython-37.pyc b/core/lib/mechanize/__pycache__/_clientcookie.cpython-37.pyc new file mode 100644 index 00000000..7d3e703b Binary files /dev/null and b/core/lib/mechanize/__pycache__/_clientcookie.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_debug.cpython-37.pyc b/core/lib/mechanize/__pycache__/_debug.cpython-37.pyc new file mode 100644 index 00000000..c7d9a56d Binary files /dev/null and b/core/lib/mechanize/__pycache__/_debug.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_entities.cpython-37.pyc b/core/lib/mechanize/__pycache__/_entities.cpython-37.pyc new file mode 100644 index 00000000..d424ad30 Binary files /dev/null and b/core/lib/mechanize/__pycache__/_entities.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_equiv.cpython-37.pyc b/core/lib/mechanize/__pycache__/_equiv.cpython-37.pyc new file mode 100644 index 00000000..3ecfd596 Binary files /dev/null and b/core/lib/mechanize/__pycache__/_equiv.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_form.cpython-37.pyc b/core/lib/mechanize/__pycache__/_form.cpython-37.pyc new file mode 100644 index 00000000..0b311a2d Binary files /dev/null and b/core/lib/mechanize/__pycache__/_form.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_form_controls.cpython-37.pyc b/core/lib/mechanize/__pycache__/_form_controls.cpython-37.pyc new file mode 100644 index 00000000..4389b7da Binary files /dev/null and b/core/lib/mechanize/__pycache__/_form_controls.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_gzip.cpython-37.pyc b/core/lib/mechanize/__pycache__/_gzip.cpython-37.pyc new file mode 100644 index 00000000..4bacbfa9 Binary files /dev/null and b/core/lib/mechanize/__pycache__/_gzip.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_headersutil.cpython-37.pyc b/core/lib/mechanize/__pycache__/_headersutil.cpython-37.pyc new file mode 100644 index 00000000..dfd08984 Binary files /dev/null and b/core/lib/mechanize/__pycache__/_headersutil.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_html.cpython-37.pyc b/core/lib/mechanize/__pycache__/_html.cpython-37.pyc new file mode 100644 index 00000000..f17b847f Binary files /dev/null and b/core/lib/mechanize/__pycache__/_html.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_http.cpython-37.pyc b/core/lib/mechanize/__pycache__/_http.cpython-37.pyc new file mode 100644 index 00000000..80f60ed0 Binary files /dev/null and b/core/lib/mechanize/__pycache__/_http.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_mechanize.cpython-37.pyc b/core/lib/mechanize/__pycache__/_mechanize.cpython-37.pyc new file mode 100644 index 00000000..f878d1d6 Binary files /dev/null and b/core/lib/mechanize/__pycache__/_mechanize.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_opener.cpython-37.pyc b/core/lib/mechanize/__pycache__/_opener.cpython-37.pyc new file mode 100644 index 00000000..c3e94a7a Binary files /dev/null and b/core/lib/mechanize/__pycache__/_opener.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_request.cpython-37.pyc b/core/lib/mechanize/__pycache__/_request.cpython-37.pyc new file mode 100644 index 00000000..562fd545 Binary files /dev/null and b/core/lib/mechanize/__pycache__/_request.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_response.cpython-37.pyc b/core/lib/mechanize/__pycache__/_response.cpython-37.pyc new file mode 100644 index 00000000..50ddeb47 Binary files /dev/null and b/core/lib/mechanize/__pycache__/_response.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_rfc3986.cpython-37.pyc b/core/lib/mechanize/__pycache__/_rfc3986.cpython-37.pyc new file mode 100644 index 00000000..453d0efb Binary files /dev/null and b/core/lib/mechanize/__pycache__/_rfc3986.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_sockettimeout.cpython-37.pyc b/core/lib/mechanize/__pycache__/_sockettimeout.cpython-37.pyc new file mode 100644 index 00000000..152ad1ce Binary files /dev/null and b/core/lib/mechanize/__pycache__/_sockettimeout.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_urllib2.cpython-37.pyc b/core/lib/mechanize/__pycache__/_urllib2.cpython-37.pyc new file mode 100644 index 00000000..42cbac8f Binary files /dev/null and b/core/lib/mechanize/__pycache__/_urllib2.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_urllib2_fork.cpython-37.pyc b/core/lib/mechanize/__pycache__/_urllib2_fork.cpython-37.pyc new file mode 100644 index 00000000..2fae671f Binary files /dev/null and b/core/lib/mechanize/__pycache__/_urllib2_fork.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_useragent.cpython-37.pyc b/core/lib/mechanize/__pycache__/_useragent.cpython-37.pyc new file mode 100644 index 00000000..69c2f585 Binary files /dev/null and b/core/lib/mechanize/__pycache__/_useragent.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_util.cpython-37.pyc b/core/lib/mechanize/__pycache__/_util.cpython-37.pyc new file mode 100644 index 00000000..d3e82356 Binary files /dev/null and b/core/lib/mechanize/__pycache__/_util.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/_version.cpython-37.pyc b/core/lib/mechanize/__pycache__/_version.cpython-37.pyc new file mode 100644 index 00000000..53ecad65 Binary files /dev/null and b/core/lib/mechanize/__pycache__/_version.cpython-37.pyc differ diff --git a/core/lib/mechanize/__pycache__/polyglot.cpython-37.pyc b/core/lib/mechanize/__pycache__/polyglot.cpython-37.pyc new file mode 100644 index 00000000..231e1191 Binary files /dev/null and b/core/lib/mechanize/__pycache__/polyglot.cpython-37.pyc differ diff --git a/lib/mechanize/mechanize/_auth.py b/core/lib/mechanize/_auth.py similarity index 91% rename from lib/mechanize/mechanize/_auth.py rename to core/lib/mechanize/_auth.py index e8b7a076..023a689b 100644 --- a/lib/mechanize/mechanize/_auth.py +++ b/core/lib/mechanize/_auth.py @@ -4,13 +4,14 @@ Copyright 2006 John J. Lee This code is free software; you can redistribute it and/or modify it under -the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt +the terms of the BSD or ZPL 2.1 licenses (see the file LICENSE included with the distribution). """ from __future__ import absolute_import from ._urllib2_fork import HTTPPasswordMgr +from .polyglot import is_string, iteritems # TODO: stop deriving from HTTPPasswordMgr @@ -19,7 +20,7 @@ class HTTPProxyPasswordMgr(HTTPPasswordMgr): def add_password(self, realm, uri, user, passwd): # uri could be a single URI or a sequence - if uri is None or isinstance(uri, basestring): + if uri is None or is_string(uri): uris = [uri] else: uris = uri @@ -38,7 +39,7 @@ def find_user_password(self, realm, authuri): authinfo_by_domain = self.passwd.get(realm, {}) for default_port in True, False: reduced_authuri = self.reduce_uri(authuri, default_port) - for uri, authinfo in authinfo_by_domain.iteritems(): + for uri, authinfo in iteritems(authinfo_by_domain): if uri is None and not default_uri: continue if self.is_suburi(uri, reduced_authuri): diff --git a/lib/mechanize/mechanize/_clientcookie.py b/core/lib/mechanize/_clientcookie.py similarity index 92% rename from lib/mechanize/mechanize/_clientcookie.py rename to core/lib/mechanize/_clientcookie.py index a6e427a9..ebd27736 100644 --- a/lib/mechanize/mechanize/_clientcookie.py +++ b/core/lib/mechanize/_clientcookie.py @@ -2,15 +2,12 @@ import re import time -from cookielib import Cookie as _Cookie -from cookielib import CookieJar as CJ -from cookielib import MozillaCookieJar as MCJ -from cookielib import request_host as request_host_lc -from cookielib import (DEFAULT_HTTP_PORT, CookiePolicy, DefaultCookiePolicy, - FileCookieJar, LoadError, LWPCookieJar, _debug, - domain_match, eff_request_host, escape_path, is_HDN, - lwp_cookie_str, reach, request_path, request_port, - user_domain_match) +from .polyglot import ( + Cookie as _Cookie, CookieJar as CJ, MozillaCookieJar as MCJ, request_host + as request_host_lc, DEFAULT_HTTP_PORT, CookiePolicy, DefaultCookiePolicy, + FileCookieJar, LoadError, LWPCookieJar, _debug, domain_match, + eff_request_host, escape_path, is_HDN, lwp_cookie_str, reach, request_path, + request_port, user_domain_match, iteritems) __all__ = [ 'DEFAULT_HTTP_PORT', 'CookiePolicy', 'DefaultCookiePolicy', @@ -60,7 +57,7 @@ def __getstate__(self): return ans def __setstate__(self, val): - for k, v in val.iteritems(): + for k, v in iteritems(val): setattr(self, k, v) def cookies_for_request(self, request): @@ -77,11 +74,10 @@ def cookies_for_request(self, request): cookies = self._cookies_for_request(request) # add cookies in order of most specific (i.e. longest) path first + def key(x): + return len(x.path) - def decreasing_size(a, b): - return cmp(len(b.path), len(a.path)) - - cookies.sort(decreasing_size) + cookies.sort(key=key, reverse=True) return cookies def get_policy(self): diff --git a/lib/mechanize/mechanize/_debug.py b/core/lib/mechanize/_debug.py similarity index 100% rename from lib/mechanize/mechanize/_debug.py rename to core/lib/mechanize/_debug.py diff --git a/lib/mechanize/mechanize/_entities.py b/core/lib/mechanize/_entities.py similarity index 99% rename from lib/mechanize/mechanize/_entities.py rename to core/lib/mechanize/_entities.py index c051c811..50a075cc 100644 --- a/lib/mechanize/mechanize/_entities.py +++ b/core/lib/mechanize/_entities.py @@ -1,6 +1,6 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python # vim:fileencoding=utf-8 -# License: BSD Copyright: 2017, Kovid Goyal +# Copyright: 2017, Kovid Goyal from __future__ import (absolute_import, division, print_function, unicode_literals) diff --git a/lib/mechanize/mechanize/_equiv.py b/core/lib/mechanize/_equiv.py similarity index 87% rename from lib/mechanize/mechanize/_equiv.py rename to core/lib/mechanize/_equiv.py index f42a51a2..6c00287a 100644 --- a/lib/mechanize/mechanize/_equiv.py +++ b/core/lib/mechanize/_equiv.py @@ -1,6 +1,6 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python # vim:fileencoding=utf-8 -# License: BSD Copyright: 2017, Kovid Goyal +# Copyright: 2017, Kovid Goyal from __future__ import (absolute_import, division, print_function, unicode_literals) @@ -9,22 +9,22 @@ import string from ._entities import html5_entities +from .polyglot import codepoint_to_chr space_chars = frozenset(("\t", "\n", "\u000C", " ", "\r")) space_chars_bytes = frozenset(item.encode("ascii") for item in space_chars) ascii_letters_bytes = frozenset( item.encode("ascii") for item in string.ascii_letters) -ascii_uppercase_bytes = frozenset( - item.encode("ascii") for item in string.ascii_uppercase) spaces_angle_brackets = space_chars_bytes | frozenset((b">", b"<")) skip1 = space_chars_bytes | frozenset((b"/", )) -head_elems = frozenset(("html", "head", "title", "base", "script", "style", - "meta", "link", "object")) +head_elems = frozenset(( + b"html", b"head", b"title", b"base", b"script", + b"style", b"meta", b"link", b"object")) def my_unichr(num): try: - return unichr(num) + return codepoint_to_chr(num) except (ValueError, OverflowError): return '?' @@ -62,9 +62,6 @@ class Bytes(bytes): If the position is ever greater than the string length then an exception is raised""" - def __new__(self, value): - return bytes.__new__(self, value.lower()) - def __init__(self, value): self._position = -1 @@ -130,6 +127,7 @@ def skip_until(self, chars): return self[pos:p], c p += 1 self._position = p + return b'', b'' def match_bytes(self, bytes): """Look for a sequence of bytes at the start of a string. If the bytes @@ -142,11 +140,21 @@ def match_bytes(self, bytes): self.position += len(bytes) return rv + def match_bytes_pat(self, pat): + bytes = pat.pattern + m = pat.match(self, self.position) + if m is None: + return False + bytes = m.group() + self.position += len(bytes) + return True + def jump_to(self, bytes): """Look for the next sequence of bytes matching a given sequence. If a match is found advance the position to the last byte of the match""" - new_pos = self[self.position:].find(bytes) + new_pos = self.find(bytes, max(0, self.position)) if new_pos > -1: + new_pos -= self.position if self._position == -1: self._position = 0 self._position += (new_pos + len(bytes) - 1) @@ -164,16 +172,22 @@ def __init__(self, data): self.headers = [] def __call__(self): - dispatch = ((b" 'type://host/path' - self.__original = unwrap(url) + self.__original = normalize_url(https://github.com/unwrap(url)) self.type = None self._method = method and str(method) # self.__r_type is what's left after doing the splittype @@ -135,14 +197,18 @@ def __init__(self, url, data=None, headers={}, self.port = None self._tunnel_host = None self.data = data - self.headers = {} - for key, value in headers.items(): + self.headers = OrderedDict() + for key, value in iteritems(headers): self.add_header(key, value) - self.unredirected_hdrs = {} + self.unredirected_hdrs = OrderedDict() if origin_req_host is None: origin_req_host = request_host(self) self.origin_req_host = origin_req_host self.unverifiable = unverifiable + try: + self.get_host() # in py3 cookiejar expect self.host to be not None + except Exception: + self.host = None def __getattr__(self, attr): # XXX this is a fallback mechanism to guard against these @@ -180,6 +246,13 @@ def get_data(self): def get_full_url(https://github.com/self): return self.__original + @property + def full_url(https://github.com/self): + # In python 3 this is a deleteable and settable property, which when + # deleted gets set to None. But this interface does not seem to be used + # by any stdlib code, so this should be sufficient. + return self.__original + def get_type(self): if self.type is None: self.type, self.__r_type = splittype(self.__original) @@ -261,14 +334,15 @@ def header_items(self): ''' hdrs = self.unredirected_hdrs.copy() hdrs.update(self.headers) - return list(hdrs.iteritems()) + return list(iteritems(hdrs)) -class OpenerDirector: +class OpenerDirector(object): def __init__(self): client_version = "Python-urllib/%s" % __version__ self.addheaders = [('User-agent', client_version)] + self.finalize_request_headers = None # manage the individual handlers self.handlers = [] self.handle_open = {} @@ -392,21 +466,15 @@ def build_opener(*handlers): If any of the handlers passed as arguments are subclasses of the default handlers, the default handlers will not be used. """ - import types - - def isclass(obj): - return isinstance(obj, (types.ClassType, type)) - opener = OpenerDirector() default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler, HTTPRedirectHandler, FTPHandler, FileHandler, HTTPErrorProcessor] - if hasattr(httplib, 'HTTPS'): - default_classes.append(HTTPSHandler) + default_classes.append(HTTPSHandler) skip = set() for klass in default_classes: for check in handlers: - if isclass(check): + if is_class(check): if issubclass(check, klass): skip.add(klass) elif isinstance(check, klass): @@ -418,7 +486,7 @@ def isclass(obj): opener.add_handler(klass()) for h in handlers: - if isclass(h): + if is_class(h): h = h() opener.add_handler(h) return opener @@ -435,7 +503,8 @@ def close(self): pass def __lt__(self, other): - return self.handler_order < getattr(other, 'handler_order', sys.maxint) + return self.handler_order < getattr( + other, 'handler_order', sys.maxsize) def __copy__(self): return self.__class__() @@ -674,14 +743,14 @@ def __init__(self, proxies=None, proxy_bypass=None): if proxies is None: proxies = getproxies() - assert hasattr(proxies, 'has_key'), "proxies must be a mapping" + assert is_mapping(proxies), "proxies must be a mapping" self.proxies = proxies - for type, url in proxies.items(): + for type, url in iteritems(proxies): setattr(self, '%s_open' % type, lambda r, proxy=url, type=type, meth=self.proxy_open: meth(r, proxy, type)) if proxy_bypass is None: - proxy_bypass = urllib.proxy_bypass + proxy_bypass = urllib_proxy_bypass self._proxy_bypass = proxy_bypass def proxy_open(self, req, proxy, type): @@ -696,7 +765,11 @@ def proxy_open(self, req, proxy, type): if user and password: user_pass = '%s:%s' % (unquote(user), unquote(password)) + if not isinstance(user_pass, bytes): + user_pass = user_pass.encode('utf-8') creds = base64.b64encode(user_pass).strip() + if isinstance(creds, bytes): + creds = creds.decode('ascii') req.add_header('Proxy-authorization', 'Basic ' + creds) hostport = unquote(hostport) req.set_proxy(hostport, proxy_type) @@ -723,7 +796,7 @@ def __init__(self): def add_password(self, realm, uri, user, passwd): # uri could be a single URI or a sequence - if isinstance(uri, basestring): + if is_string(uri): uri = [uri] if realm not in self.passwd: self.passwd[realm] = {} @@ -736,7 +809,7 @@ def find_user_password(self, realm, authuri): domains = self.passwd.get(realm, {}) for default_port in True, False: reduced_authuri = self.reduce_uri(authuri, default_port) - for uris, authinfo in domains.iteritems(): + for uris, authinfo in iteritems(domains): for uri in uris: if self.is_suburi(uri, reduced_authuri): return authinfo @@ -745,7 +818,7 @@ def find_user_password(self, realm, authuri): def reduce_uri(self, uri, default_port=True): """Accept authority or URI and extract only the authority and path.""" # note HTTP URLs do not have a userinfo component - parts = urlparse.urlsplit(uri) + parts = urlsplit(uri) if parts[1]: # URI scheme = parts[0] @@ -781,7 +854,7 @@ def is_suburi(self, base, test): def __copy__(self): ans = self.__class__() - ans.proxies = copy.deepcopy(self.passwd) + ans.passwd = copy.deepcopy(self.passwd) return ans @@ -831,8 +904,9 @@ def retry_http_basic_auth(self, host, req, realm): user, pw = self.passwd.find_user_password(realm, host) if pw is not None: raw = "%s:%s" % (user, pw) - auth = 'Basic %s' % base64.b64encode(raw).strip() - if req.headers.get(self.auth_header, None) == auth: + auth = str('Basic %s' % base64.b64encode( + raw.encode('utf-8')).strip().decode('ascii')) + if req.get_header(self.auth_header, None) == auth: return None newreq = copy.copy(req) newreq.add_header(self.auth_header, auth) @@ -875,19 +949,7 @@ def __copy__(self): return AbstractBasicAuthHandler.__copy__(self) -def randombytes(n): - """Return n random bytes.""" - # Use /dev/urandom if it is available. Fall back to random module - # if not. It might be worthwhile to extend this function to use - # other platform-specific mechanisms for getting random bytes. - if os.path.exists("/dev/urandom"): - f = open("/dev/urandom") - s = f.read(n) - f.close() - return s - else: - L = [chr(random.randrange(0, 256)) for i in range(n)] - return "".join(L) +randombytes = os.urandom class AbstractDigestAuthHandler: @@ -936,7 +998,7 @@ def retry_http_digest_auth(self, req, auth): auth = self.get_authorization(req, chal) if auth: auth_val = 'Digest %s' % auth - if req.headers.get(self.auth_header, None) == auth_val: + if req.get_header(self.auth_header, None) == auth_val: return None newreq = copy.copy(req) newreq.add_unredirected_header(self.auth_header, auth_val) @@ -1047,7 +1109,7 @@ class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): handler_order = 490 # before Basic auth def http_error_401(self, req, fp, code, msg, headers): - host = urlparse.urlparse(req.get_full_url())[1] + host = urlparse(req.get_full_url())[1] retry = self.http_error_auth_reqed('www-authenticate', host, req, headers) self.reset_retry_count() @@ -1116,7 +1178,7 @@ def do_open(self, http_class, req): http_class must implement the HTTPConnection API from httplib. The addinfourl return value is a file-like object. It also has methods and attributes including: - - info(): return a mimetools.Message object for the headers + - info(): return a HTTPMessage object for the headers - geturl(): return the original request URL - code: HTTP status code """ @@ -1127,31 +1189,42 @@ def do_open(self, http_class, req): h = http_class(host_port, timeout=req.timeout) h.set_debuglevel(self._debuglevel) - headers = dict(req.headers) - headers.update(req.unredirected_hdrs) + headers = OrderedDict(req.headers) + for key, val in iteritems(req.unredirected_hdrs): + headers[key] = val # We want to make an HTTP/1.1 request, but the addinfourl # class isn't prepared to deal with a persistent connection. # It will try to read all remaining data from the socket, # which will block while the server waits for the next request. # So make sure the connection gets closed after the (only) # request. - headers[b"Connection"] = b"close" + headers["Connection"] = "close" # httplib in python 2 needs str() not unicode() for all request # parameters - headers = {str(name.title()): str(val) - for name, val in headers.items()} + if is_py2: + headers = OrderedDict( + (str(name.title()), str(val)) + for name, val in iteritems(headers)) + else: + headers = OrderedDict( + (as_unicode(name, 'iso-8859-1').title(), + as_unicode(val, 'iso-8859-1')) + for name, val in iteritems(headers)) if req._tunnel_host: set_tunnel = h.set_tunnel if hasattr( h, "set_tunnel") else h._set_tunnel tunnel_headers = {} - proxy_auth_hdr = b"Proxy-Authorization" + proxy_auth_hdr = "Proxy-Authorization" if proxy_auth_hdr in headers: tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] # Proxy-Authorization should not be sent to origin server. del headers[proxy_auth_hdr] set_tunnel(req._tunnel_host, headers=tunnel_headers) + if self.parent.finalize_request_headers is not None: + self.parent.finalize_request_headers(req, headers) + try: h.request(str(req.get_method()), str(req.get_selector()), req.data, headers) @@ -1161,16 +1234,6 @@ def do_open(self, http_class, req): # Pick apart the HTTPResponse object to get the addinfourl # object initialized properly. - - # Wrap the HTTPResponse object in socket's file object adapter - # for Windows. That adapter calls recv(), so delegate recv() - # to read(). This weird wrapping allows the returned object to - # have readline() and readlines() methods. - - # XXX It might be better to extract the read buffering code - # out of socket._fileobject() and into a base class. - - r.recv = r.read fp = create_readline_wrapper(r) resp = closeable_response( @@ -1185,42 +1248,40 @@ def __copy__(self): class HTTPHandler(AbstractHTTPHandler): def http_open(self, req): - return self.do_open(httplib.HTTPConnection, req) + return self.do_open(HTTPConnection, req) http_request = AbstractHTTPHandler.do_request_ -if hasattr(httplib, 'HTTPS'): +class HTTPSHandler(AbstractHTTPHandler): - class HTTPSHandler(AbstractHTTPHandler): + def __init__(self, client_cert_manager=None): + AbstractHTTPHandler.__init__(self) + self.client_cert_manager = client_cert_manager + self.ssl_context = None - def __init__(self, client_cert_manager=None): - AbstractHTTPHandler.__init__(self) - self.client_cert_manager = client_cert_manager - self.ssl_context = None - - def https_open(self, req): - key_file = cert_file = None - if self.client_cert_manager is not None: - key_file, cert_file = self.client_cert_manager.find_key_cert( - req.get_full_url()) - if self.ssl_context is None: - conn_factory = partial( - httplib.HTTPSConnection, key_file=key_file, - cert_file=cert_file) - else: - conn_factory = partial( - httplib.HTTPSConnection, key_file=key_file, - cert_file=cert_file, context=self.ssl_context) - return self.do_open(conn_factory, req) + def https_open(self, req): + key_file = cert_file = None + if self.client_cert_manager is not None: + key_file, cert_file = self.client_cert_manager.find_key_cert( + req.get_full_url()) + if self.ssl_context is None: + conn_factory = partial( + HTTPSConnection, key_file=key_file, + cert_file=cert_file) + else: + conn_factory = partial( + HTTPSConnection, key_file=key_file, + cert_file=cert_file, context=self.ssl_context) + return self.do_open(conn_factory, req) - https_request = AbstractHTTPHandler.do_request_ + https_request = AbstractHTTPHandler.do_request_ - def __copy__(self): - ans = self.__class__(self.client_cert_manager) - ans._debuglevel = self._debuglevel - ans.ssl_context = self.ssl_context - return ans + def __copy__(self): + ans = self.__class__(self.client_cert_manager) + ans._debuglevel = self._debuglevel + ans.ssl_context = self.ssl_context + return ans class HTTPCookieProcessor(BaseHandler): @@ -1259,12 +1320,12 @@ def unknown_open(self, req): raise URLError('unknown url type: %s' % type) -def parse_keqv_list(l): +def parse_keqv_list(ln): """Parse list of key=value strings where keys are not duplicated.""" parsed = {} - for elt in l: + for elt in ln: k, v = elt.split('=', 1) - if v[0] == '"' and v[-1] == '"': + if v[0:1] == '"' and v[-1:] == '"': v = v[1:-1] parsed[k] = v return parsed @@ -1311,7 +1372,7 @@ def parse_http_list(s): if part: res.append(part) - return [part_.strip() for part_ in res] + return list(filter(None, (part_.strip() for part_ in res))) class FileHandler(BaseHandler): @@ -1340,11 +1401,7 @@ def get_names(self): # not entirely sure what the rules are here def open_local_file(self, req): - try: - import email.utils as emailutils - except ImportError: - # python 2.4 - import email.Utils as emailutils + import email.utils as emailutils import mimetypes host = req.get_host() file = req.get_selector() @@ -1358,16 +1415,17 @@ def open_local_file(self, req): size = stats.st_size modified = emailutils.formatdate(stats.st_mtime, usegmt=True) mtype = mimetypes.guess_type(file)[0] - headers = mimetools.Message(StringIO( - 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % - (mtype or 'text/plain', size, modified))) + headers = create_response_info(BytesIO( + ('Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % + (mtype or 'text/plain', size, modified)).encode( + 'iso-8859-1'))) if host: host, port = splitport(host) if not host or ( not port and socket.gethostbyname(host) in self.get_names() ): - return addinfourl(https://github.com/open(localfile,%20'rb'), - headers, 'file:' + file) + fp = open(localfile, 'rb') + return closeable_response(fp, headers, 'file:' + file) except OSError as msg: # urllib2 users shouldn't expect OSErrors coming from urlopen() raise URLError(msg) @@ -1404,7 +1462,7 @@ def ftp_open(self, req): raise URLError(msg) path, attrs = splitattr(req.get_selector()) dirs = path.split('/') - dirs = map(unquote, dirs) + dirs = list(map(unquote, dirs)) dirs, file = dirs[:-1], dirs[-1] if dirs and not dirs[0]: dirs = dirs[1:] @@ -1423,11 +1481,11 @@ def ftp_open(self, req): headers += "Content-type: %s\n" % mtype if retrlen is not None and retrlen >= 0: headers += "Content-length: %d\n" % retrlen - sf = StringIO(headers) - headers = mimetools.Message(sf) - return addinfourl(https://github.com/fp,%20headers,%20req.get_full_url()) + sf = BytesIO(headers.encode('iso-8859-1')) + headers = create_response_info(sf) + return closeable_response(fp, headers, req.get_full_url()) except ftplib.all_errors as msg: - raise URLError('ftp error: %s' % msg), None, sys.exc_info()[2] + raise_with_traceback(URLError('ftp error: %s' % msg)) def connect_ftp(self, user, passwd, host, port, dirs, timeout): try: @@ -1471,7 +1529,7 @@ def check_cache(self): # first check for old ones t = time.time() if self.soonest <= t: - for k, v in self.timeout.items(): + for k, v in iteritems(self.timeout): if v < t: self.cache[k].close() del self.cache[k] @@ -1480,7 +1538,7 @@ def check_cache(self): # then check the size if len(self.cache) == self.max_conns: - for k, v in self.timeout.items(): + for k, v in iteritems(self.timeout): if v == self.soonest: del self.cache[k] del self.timeout[k] diff --git a/lib/mechanize/mechanize/_useragent.py b/core/lib/mechanize/_useragent.py similarity index 97% rename from lib/mechanize/mechanize/_useragent.py rename to core/lib/mechanize/_useragent.py index 7e2a0efc..0375b6d3 100644 --- a/lib/mechanize/mechanize/_useragent.py +++ b/core/lib/mechanize/_useragent.py @@ -6,7 +6,7 @@ Copyright 2003-2006 John J. Lee This code is free software; you can redistribute it and/or modify it under -the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt +the terms of the BSD or ZPL 2.1 licenses (see the file LICENSE included with the distribution). """ @@ -16,6 +16,7 @@ import copy from . import _auth, _gzip, _opener, _response, _sockettimeout, _urllib2 +from .polyglot import iteritems, itervalues class UserAgentBase(_opener.OpenerDirector): @@ -100,7 +101,7 @@ def __init__(self): self.default_features): klass = self.handler_classes[scheme] ua_handlers[scheme] = klass() - for handler in ua_handlers.itervalues(): + for handler in tuple(itervalues(ua_handlers)): self.add_handler(handler) # Yuck. @@ -155,7 +156,7 @@ def set_handled_schemes(self, schemes): want[scheme] = None # get rid of scheme handlers we don't want - for scheme, oldhandler in self._ua_handlers.items(): + for scheme, oldhandler in tuple(iteritems(self._ua_handlers)): if scheme.startswith("_"): continue # not a scheme handler if scheme not in want: @@ -163,7 +164,7 @@ def set_handled_schemes(self, schemes): else: del want[scheme] # already got it # add the scheme handlers that are missing - for scheme in want.keys(): + for scheme in want: self._set_handler(scheme, True) def set_cookiejar(self, cookiejar): @@ -324,8 +325,8 @@ def set_debug_redirects(self, handle): Other logger names relevant to this module: - `"mechanize.http_responses"` - `"mechanize.cookies"` + * `mechanize.http_responses` + * `mechanize.cookies` To turn on everything: @@ -362,7 +363,7 @@ def _copy_state(self, other): if self._ua_handlers is None: raise ValueError('Cannot copy state from a closed UserAgentBase') other.addheaders = self.addheaders[:] - rmap = {v: k for k, v in self._ua_handlers.iteritems()} + rmap = {v: k for k, v in iteritems(self._ua_handlers)} def clone_handler(h): ans = copy.copy(h) diff --git a/lib/mechanize/mechanize/_util.py b/core/lib/mechanize/_util.py similarity index 96% rename from lib/mechanize/mechanize/_util.py rename to core/lib/mechanize/_util.py index ace2175a..61a02ee9 100644 --- a/lib/mechanize/mechanize/_util.py +++ b/core/lib/mechanize/_util.py @@ -4,7 +4,7 @@ This code is free software; you can redistribute it and/or modify it under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). +LICENSE included with the distribution). """ from __future__ import absolute_import @@ -44,11 +44,8 @@ def reset_deprecations(): def read_file(filename): - fh = open(filename) - try: - return fh.read() - finally: - fh.close() + with open(filename, 'rb') as f: + return f.read() def write_file(filename, data): @@ -67,22 +64,14 @@ def get1(sequence): def isstringlike(x): try: x + "" - except: + except Exception: return False else: return True -# def caller(): -# try: -## raise SyntaxError -# except: -## import sys -# return sys.exc_traceback.tb_frame.f_back.f_back.f_code.co_name - - - # Date/time conversion routines for formats used by the HTTP protocol. + EPOCH = 1970 @@ -94,6 +83,7 @@ def my_timegm(tt): else: return None + days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] @@ -299,7 +289,7 @@ def http2time(text): iso_re = re.compile( - """^ + r"""^ (\d{4}) # year [-\/]? (\d\d?) # numerical month diff --git a/core/lib/mechanize/_version.py b/core/lib/mechanize/_version.py new file mode 100644 index 00000000..71edbedf --- /dev/null +++ b/core/lib/mechanize/_version.py @@ -0,0 +1,2 @@ +"0.4.4" +__version__ = (0, 4, 4, None, None) diff --git a/core/lib/mechanize/polyglot.py b/core/lib/mechanize/polyglot.py new file mode 100644 index 00000000..b57624d7 --- /dev/null +++ b/core/lib/mechanize/polyglot.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +# Copyright: 2018, Kovid Goyal + +from __future__ import (absolute_import, division, print_function, + unicode_literals) + +import sys +import collections + +is_py2 = sys.version_info.major < 3 + +if is_py2: + import types + from urllib import ( + urlencode, pathname2url, quote, addinfourl, quote_plus, + urlopen, splitattr, splithost as urllib_splithost, getproxies, + ftpwrapper, proxy_bypass as urllib_proxy_bypass, splitpasswd, + splitport, splittype, splituser, splitvalue, + unquote, unwrap, url2pathname + ) + from urllib2 import ( + HTTPError, URLError, install_opener, build_opener, ProxyHandler + ) + from robotparser import RobotFileParser + from urlparse import urlsplit, urljoin, urlparse, urlunparse + from httplib import HTTPMessage, HTTPConnection, HTTPSConnection + from cookielib import ( + DEFAULT_HTTP_PORT, CookiePolicy, DefaultCookiePolicy, + FileCookieJar, LoadError, LWPCookieJar, _debug, domain_match, + eff_request_host, escape_path, is_HDN, lwp_cookie_str, reach, + request_path, request_port, user_domain_match, Cookie, CookieJar, + MozillaCookieJar, request_host) + from cStringIO import StringIO + from future_builtins import map # noqa + + def is_string(x): + return isinstance(x, basestring) + + def iteritems(x): + return x.iteritems() + + def itervalues(x): + return x.itervalues() + + def is_class(obj): + return isinstance(obj, (types.ClassType, type)) + + def raise_with_traceback(exc): + exec('raise exc, None, sys.exc_info()[2]') + + def is_mapping(x): + return isinstance(x, collections.Mapping) + + codepoint_to_chr = unichr + unicode_type = unicode + create_response_info = HTTPMessage + + +else: + from urllib.error import HTTPError, URLError + from urllib.robotparser import RobotFileParser + from urllib.parse import ( + urlsplit, urljoin, urlparse, urlunparse, + urlencode, quote_plus, splitattr, splithost as urllib_splithost, + splitpasswd, splitport, splittype, splituser, splitvalue, + unquote, unwrap + ) + from urllib.request import ( + pathname2url, quote, addinfourl, install_opener, build_opener, + ProxyHandler, urlopen as _urlopen, getproxies, ftpwrapper, + proxy_bypass as urllib_proxy_bypass, url2pathname, Request) + from http.client import ( + HTTPMessage, parse_headers, HTTPConnection, + HTTPSConnection) + from http.cookiejar import ( + DEFAULT_HTTP_PORT, CookiePolicy, DefaultCookiePolicy, + FileCookieJar, LoadError, LWPCookieJar, _debug, domain_match, + eff_request_host, escape_path, is_HDN, lwp_cookie_str, reach, + request_path, request_port, user_domain_match, Cookie, CookieJar, + MozillaCookieJar, request_host) + from io import StringIO + + def is_string(x): + return isinstance(x, str) + + def iteritems(x): + return x.items() + + def itervalues(x): + return x.values() + + def is_class(obj): + return isinstance(obj, type) + + def raise_with_traceback(exc): + raise exc.with_traceback(sys.exc_info()[2]) + + codepoint_to_chr = chr + unicode_type = str + map = map + + # Legacy code expects HTTPMessage.getheaders() + def getheaders(self, name): + return self.get_all(name, failobj=[]) + HTTPMessage.getheaders = getheaders + + # We want __getitem__ to return the last header not the first + def getitem(self, name): + vals = self.get_all(name, [None]) + return vals[-1] + HTTPMessage.__getitem__ = getitem + + # Legacy method names + HTTPMessage.gettype = HTTPMessage.get_content_type + HTTPMessage.getmainttype = HTTPMessage.get_content_maintype + HTTPMessage.getsubtype = HTTPMessage.get_content_subtype + + def is_mapping(x): + return isinstance(x, collections.abc.Mapping) + + def create_response_info(fp): + return parse_headers(fp) + + def urlopen(*a, **kw): + proxies = kw.pop('proxies', None) + if proxies is None: + return _urlopen(*a, **kw) + r = Request(a[0]) + for k, v in proxies.items(): + r.set_proxy(v, k) + return _urlopen(r, *a[1:], **kw) + + +def as_unicode(x, encoding='utf-8'): + if isinstance(x, bytes): + x = x.decode('utf-8') + return x + + +if False: + (HTTPError, urlsplit, urljoin, urlparse, urlunparse, urlencode, + HTTPMessage, splitattr, urllib_splithost, getproxies, ftpwrapper, + urllib_proxy_bypass, splituser, splitpasswd, splitport, + splitvalue, splittype, unquote, unwrap, url2pathname) + pathname2url, RobotFileParser, URLError, quote, HTTPConnection + HTTPSConnection, StringIO, addinfourl, install_opener, build_opener + ProxyHandler, quote_plus, urlopen + (DEFAULT_HTTP_PORT, CookiePolicy, DefaultCookiePolicy, + FileCookieJar, LoadError, LWPCookieJar, _debug, + domain_match, eff_request_host, escape_path, is_HDN, + lwp_cookie_str, reach, request_path, request_port, + user_domain_match, Cookie, CookieJar, MozillaCookieJar, request_host) diff --git a/core/loadstyle.py b/core/loadstyle.py deleted file mode 100644 index 30901322..00000000 --- a/core/loadstyle.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python2 - -#-:-:-:-:-:-:-:-:-:-:-:-:# -# TIDoS Framework # -#-:-:-:-:-:-:-:-:-:-:-:-:# - -#This module requires TIDoS Framework -#https://github.com/theInfectedDrake/TIDoS-Framework - -import os -import time -from colors import * - -def loadstyle(): - - os.system('clear') - red_bold = R - reset = W - loading = "Loading the TIDoS Framework..." - action = 0 - while action < 1: - for i,char in enumerate(loading): - if i == 0: - print "%s%s%s%s" %(red_bold,char.upper(),reset,loading[1:]) - elif i == 1: - old_loading = loading[0].lower() - print "%s%s%s%s%s" %(old_loading,red_bold,char.upper(),reset,loading[2:]) - elif i == i: - old_loading = loading[-0:i].lower() - print "%s%s%s%s%s" %(old_loading,red_bold,char.upper(),reset,loading[i+1:]) - time.sleep(0.1) - os.system('clear') - action += 1 - diff --git a/core/loot/.directory b/core/loot/.directory new file mode 100644 index 00000000..64c8afa2 --- /dev/null +++ b/core/loot/.directory @@ -0,0 +1,3 @@ +[Dolphin] +Timestamp=2019,7,3,12,50,49 +Version=4 diff --git a/core/methods/__pycache__/cache.cpython-37.pyc b/core/methods/__pycache__/cache.cpython-37.pyc new file mode 100644 index 00000000..d913f497 Binary files /dev/null and b/core/methods/__pycache__/cache.cpython-37.pyc differ diff --git a/core/methods/__pycache__/cache.cpython-38.pyc b/core/methods/__pycache__/cache.cpython-38.pyc new file mode 100644 index 00000000..a43f4c9e Binary files /dev/null and b/core/methods/__pycache__/cache.cpython-38.pyc differ diff --git a/core/methods/__pycache__/creds.cpython-37.pyc b/core/methods/__pycache__/creds.cpython-37.pyc new file mode 100644 index 00000000..85916a0e Binary files /dev/null and b/core/methods/__pycache__/creds.cpython-37.pyc differ diff --git a/core/methods/__pycache__/creds.cpython-38.pyc b/core/methods/__pycache__/creds.cpython-38.pyc new file mode 100644 index 00000000..ed028d6d Binary files /dev/null and b/core/methods/__pycache__/creds.cpython-38.pyc differ diff --git a/core/methods/__pycache__/fetch.cpython-38.pyc b/core/methods/__pycache__/fetch.cpython-38.pyc new file mode 100644 index 00000000..ec709b42 Binary files /dev/null and b/core/methods/__pycache__/fetch.cpython-38.pyc differ diff --git a/core/methods/__pycache__/inputin.cpython-37.pyc b/core/methods/__pycache__/inputin.cpython-37.pyc new file mode 100644 index 00000000..57ad8362 Binary files /dev/null and b/core/methods/__pycache__/inputin.cpython-37.pyc differ diff --git a/core/methods/__pycache__/inputin.cpython-38.pyc b/core/methods/__pycache__/inputin.cpython-38.pyc new file mode 100644 index 00000000..f310e43d Binary files /dev/null and b/core/methods/__pycache__/inputin.cpython-38.pyc differ diff --git a/core/methods/__pycache__/multiproc.cpython-37.pyc b/core/methods/__pycache__/multiproc.cpython-37.pyc new file mode 100644 index 00000000..16ce1db6 Binary files /dev/null and b/core/methods/__pycache__/multiproc.cpython-37.pyc differ diff --git a/core/methods/__pycache__/multiproc.cpython-38.pyc b/core/methods/__pycache__/multiproc.cpython-38.pyc new file mode 100644 index 00000000..0ca1b856 Binary files /dev/null and b/core/methods/__pycache__/multiproc.cpython-38.pyc differ diff --git a/core/methods/__pycache__/netinfo.cpython-37.pyc b/core/methods/__pycache__/netinfo.cpython-37.pyc new file mode 100644 index 00000000..de106a55 Binary files /dev/null and b/core/methods/__pycache__/netinfo.cpython-37.pyc differ diff --git a/core/methods/__pycache__/parser.cpython-37.pyc b/core/methods/__pycache__/parser.cpython-37.pyc new file mode 100644 index 00000000..fb008999 Binary files /dev/null and b/core/methods/__pycache__/parser.cpython-37.pyc differ diff --git a/core/methods/__pycache__/parser.cpython-38.pyc b/core/methods/__pycache__/parser.cpython-38.pyc new file mode 100644 index 00000000..6c2c0ef4 Binary files /dev/null and b/core/methods/__pycache__/parser.cpython-38.pyc differ diff --git a/core/methods/__pycache__/print.cpython-37.pyc b/core/methods/__pycache__/print.cpython-37.pyc new file mode 100644 index 00000000..e7374f88 Binary files /dev/null and b/core/methods/__pycache__/print.cpython-37.pyc differ diff --git a/core/methods/__pycache__/print.cpython-38.pyc b/core/methods/__pycache__/print.cpython-38.pyc new file mode 100644 index 00000000..f6cb382d Binary files /dev/null and b/core/methods/__pycache__/print.cpython-38.pyc differ diff --git a/core/methods/__pycache__/select.cpython-37.pyc b/core/methods/__pycache__/select.cpython-37.pyc new file mode 100644 index 00000000..1b4b73b8 Binary files /dev/null and b/core/methods/__pycache__/select.cpython-37.pyc differ diff --git a/core/methods/__pycache__/select.cpython-38.pyc b/core/methods/__pycache__/select.cpython-38.pyc new file mode 100644 index 00000000..fa0543ce Binary files /dev/null and b/core/methods/__pycache__/select.cpython-38.pyc differ diff --git a/core/methods/__pycache__/threat.cpython-37.pyc b/core/methods/__pycache__/threat.cpython-37.pyc new file mode 100644 index 00000000..4cb620d1 Binary files /dev/null and b/core/methods/__pycache__/threat.cpython-37.pyc differ diff --git a/core/methods/__pycache__/threat.cpython-38.pyc b/core/methods/__pycache__/threat.cpython-38.pyc new file mode 100644 index 00000000..f82e63bb Binary files /dev/null and b/core/methods/__pycache__/threat.cpython-38.pyc differ diff --git a/core/methods/__pycache__/tor.cpython-37.pyc b/core/methods/__pycache__/tor.cpython-37.pyc new file mode 100644 index 00000000..00c3c115 Binary files /dev/null and b/core/methods/__pycache__/tor.cpython-37.pyc differ diff --git a/core/methods/__pycache__/tor.cpython-38.pyc b/core/methods/__pycache__/tor.cpython-38.pyc new file mode 100644 index 00000000..30a1eb9e Binary files /dev/null and b/core/methods/__pycache__/tor.cpython-38.pyc differ diff --git a/core/methods/cache.py b/core/methods/cache.py new file mode 100644 index 00000000..edb76655 --- /dev/null +++ b/core/methods/cache.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +_____, ___ + '+ .; + , ; + . + + . + .;. + .; + : + , + + +┌─[TIDoS]─[] +└──╼ VainlyStrain +""" + + +import core.variables as vars +import importlib, socket +from core.methods.select import bareimport +from core.methods.threat import Target +from core.Core.colors import R, color + +def targetparse(targetinp): + user = "" + passwd = "" + ip = False + if "https://" in targetinp: + port = 443 + elif "http://" in targetinp: + port = 80 + else: + ip = True + if not ip: + target = targetinp.split("://")[1] + tchk = target + if "@" in target: + creds = target.split("@")[0] + user = creds.split(":")[0] + passwd = creds.split(":")[1] + rest = target.split("@")[1] + tchk = rest + if ":" in rest: + try: + port = int(rest.split(":")[1]) + tchk = rest.split(":")[0] + except Exception as e: + print(e) + else: + if ":" in target: + try: + port = int(target.split(":")[1]) + tchk = target.split(":")[0] + except Exception as e: + print(e) + + if str(tchk).endswith('/'): + tchk = tchk[:-1] + + try: + ip = socket.gethostbyname(tchk) + parsedTarget = Target(tchk, ip) + parsedTarget.fullurl = targetinp + parsedTarget.port = port + parsedTarget.urluser = user + parsedTarget.urlpasswd = passwd + return parsedTarget + except socket.gaierror: + print(R + " [-] " + "\033[0m" + color.UNDERLINE + "\033[1m" + "Target seems down...") + pass + return None + else: + parsedTarget = Target(targetinp, targetinp) + parsedTarget.fullurl = targetinp + return parsedTarget + + +def targetname(fulltarget): + target = targetparse(fulltarget) + if target: + return target.name + else: + return "" + + +def load(i): + targets = [] + with open("core/sessioncache/{}".format(i),"r") as f: + targets = [line.rstrip("\n") for line in f] + for vic in targets: + vic2 = targetparse(vic) + if vic2: + vars.targets.append(vic2) + + +def save(i): + with open("core/sessioncache/{}".format(i),"w") as f: + for vic in vars.targets: + f.write(vic.fullurl) + f.write("\n") + +def sessionparse(i, load=True): + victims = [] + modules = {} + oneline = "" + with open("core/sessioncache/{}".format(i), "r") as file: + for line in file: + oneline += line + vicblocks = oneline.split("")[0] + victim = block.split(">")[0].strip() + victims.append(victim) + if load: + target = targetparse(victim) + if target: + vars.targets.append(target) + inter = block.replace(victim+">","") + modblocks = inter.split("")[0].strip() + modblock = modblock.split("")[0] + if ">" in modblock: + modblock = modblock.split(">")[1] + proplist = modblock.split(";") + for proptuple in proplist: + if ":" in proptuple: + prop = proptuple.split(":")[0].strip() + val = proptuple.split(":")[1].strip() + properties.update({prop : val}) + modules.update({module : properties}) + return (victims, modules) + +def createVal(victims, modules, name): + with open ("core/sessioncache/{}".format(name), "w") as file: + for victim in victims: + file.write("\n") + for module in modules: + file.write(" \n") + if "modules" in module: + j = importlib.import_module(module) + else: + p = bareimport(module) + md = p[1] + j = importlib.import_module(md) + properties = j.properties + for key, value in properties.items(): + if value[1].strip() != "": + file.write(" {}:{};\n".format(key, value[1])) + file.write(" \n") + file.write("\n") diff --git a/core/methods/create.py b/core/methods/create.py new file mode 100755 index 00000000..92b18864 --- /dev/null +++ b/core/methods/create.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +_____, ___ + '+ .; + , ; + . + + . + .;. + .; + : + , + + +┌─[TIDoS]─[] +└──╼ VainlyStrain +""" + +import argparse +import os + +p = argparse.ArgumentParser() +p.add_argument('intype', + help='Installation Type', + metavar='ITP' + ) +p.add_argument('user', + help='Unprivileged user', + metavar='USR' + ) + +args = p.parse_args() +print(args.intype) +if args.intype == "OPT": + os.system("touch /opt/TIDoS/core/sessioncache/syn.val && chown {0}:{0} /opt/TIDoS/core/sessioncache/syn.val".format(args.user)) +else: + os.system("touch /home/{0}/TIDoS/core/sessioncache/syn.val && chown {0}:{0} /home/{0}/TIDoS/core/sessioncache/syn.val".format(args.user)) diff --git a/core/methods/creds.py b/core/methods/creds.py new file mode 100644 index 00000000..08becb76 --- /dev/null +++ b/core/methods/creds.py @@ -0,0 +1,92 @@ + +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +_____, ___ + '+ .; + , ; + . + + . + .;. + .; + : + , + + +┌─[TIDoS]─[] +└──╼ VainlyStrain +""" + + +import core.variables as vars +from core.Core.colors import color, C, R, B + +def creds(inp): + if "add" in inp: + correct = True + user = input(" [§] username :> ") + passwd = input(" [§] password :> ") + url = inp.split("add")[1].strip() + if user != "" and passwd != "" and "@" not in url: + if "https" in url: + domain = url.split("://")[1] + url2 = "https://" + user + ":" + passwd + "@" + domain + elif "http" in url: + domain = url.split("://")[1] + url2 = "http://" + user + ":" + passwd + "@" + domain + else: + print(R + " [-] " + "\033[0m" + color.UNDERLINE + "\033[1m" + "Provide target formatted as in viclist") + correct = False + if correct: + found = False + for i in range(0,len(vars.targets)): + if vars.targets[i].fullurl == url: + vars.targets[i].fullurl = url2 + vars.targets[i].urluser = user + vars.targets[i].urlpasswd = passwd + found = True + if found: + print(" [+] {} > {}".format(url,url2)) + else: + print(R + " [-] " + "\033[0m" + color.UNDERLINE + "\033[1m" + "An error occurred. Either, no credentials were provided or the URL already contains credentials.") + elif "del" in inp: + correct = True + url = inp.split("del")[1].strip() + if "https" in url: + domain = url.split("@")[1] + url2 = "https://" + domain + elif "http" in url: + domain = url.split("@")[1] + url2 = "http://" + domain + else: + print(R + " [-] " + "\033[0m" + color.UNDERLINE + "\033[1m" + "Provide target formatted as in viclist") + correct = False + if correct: + found = False + for i in range(0,len(vars.targets)): + if vars.targets[i].fullurl == url: + vars.targets[i].fullurl = url2 + vars.targets[i].urluser = "" + vars.targets[i].urlpasswd = "" + found = True + if found: + print(" [+] {} > {}".format(url,url2)) + else: + print(R + " [-] " + "\033[0m" + color.UNDERLINE + "\033[1m" + "Syntax: creds add|del target") + +def attackdrop(target): + if "@" in target.fullurl: + newtarget = target + newtarget.fullurl = newtarget.name + return newtarget + #ssl = False + #if "https" in target: + # ssl = True + #splitar = target.split("@")[1] + #if ssl: + # return "https://" + splitar + #else: + # return "http://" + splitar + else: + print(R + " [-] " + "\033[0m" + color.UNDERLINE + "\033[1m" + "No credentials found.") diff --git a/core/methods/fetch.py b/core/methods/fetch.py new file mode 100644 index 00000000..f61c9914 --- /dev/null +++ b/core/methods/fetch.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +_____, ___ + '+ .;. + , ;. + . :, + ;'. + .. + .;. + .; + : + , + + +┌─[TIDoS]─[] +└──╼ VainlyStrain +""" + +import core.variables as varis +from core.methods.tor import session + + +def fetchinit(a): + try: + localver = varis.e_version.split("#")[0] + s = session() + onver = s.get("https://raw.githubusercontent.com/VainlyStrain/TIDoS-Framework/dev/core/doc/version", timeout=5).text.strip() + localmain = localver.split("-")[0] + localrev = localver.split("-")[1] + locallist = localmain.split(".") + onmain = onver.split("-")[0] + onrev = onver.split("-")[1] + onlist = onmain.split(".") + uptodate = True + for i in range(0, len(locallist)): + if int(locallist[i]) < int(onlist[i]): + uptodate = False + if uptodate: + if int(localrev) < int(onrev): + uptodate = False + if not uptodate: + varis.upd = True + except: + pass diff --git a/core/methods/inputin.py b/core/methods/inputin.py new file mode 100644 index 00000000..facc7c60 --- /dev/null +++ b/core/methods/inputin.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +# -:-:-:-:-:-:-:-:-:-:-:-:# +# TIDoS Framework # +# -:-:-:-:-:-:-:-:-:-:-:-:# + +# This module requires TIDoS Framework +# https://github.com/0xInfection/TIDoS-Framework + + + +import os +import socket +import time +import string + +import core.variables as vars +from core.Core.colors import * +from core.methods.threat import Target + + +def inputin(target): + valid_ip_regex = r'^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$' + valid_host_regex = r'^(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.)*([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\-]*[A-Za-z0-9])$' + + try: + web = target + if not str(web).startswith('http'): + mo = input(GR + ' [?] Does this website use SSL? (y/n) :> ') + if mo == 'y' or mo == 'Y': + web = 'https://' + web + elif mo == 'n': + web = 'http://' + web + + if 'http://' in web: + po = web.split('://')[1] + port = 80 + elif 'https://' in web: + po = web.split('://')[1] + port = 443 + else: + po = '' + port = 1337 + #if str(web).endswith('/'): + wspl = web.split("://") + if "/" in wspl[1]: + wspl[1] = wspl[1].split("/")[0] + web = wspl[0] + "://" + wspl[1] + if po != "": + po = wspl[1] + custport = input(" [?] Does the site use a custom port? (enter if not) :> ") + if custport != "": + inport = input(" [§] Enter port :> ") + try: + port = int(inport) + assert port in range(1, 65535) + except: + print(R+" [!] Not a valid port value"+C) + print(GR + ' [*] Checking server status...') + time.sleep(0.6) + + try: + ip = socket.gethostbyname(po) + print(G + ' [+] Site seems to be up...'+C+color.TR2+C) + time.sleep(0.5) + print(O + ' [+] IP Detected :' + C+color.TR3+C+G + ip+C+color.TR2+C) + time.sleep(0.5) + print('') + os.system('cd tmp/logs/ && rm -rf ' + po + '-logs && mkdir ' + po + '-logs/') + user = input(" [?] Enter username (leave blank if none): ") + passwd = input(" [?] Enter password (leave blank if none): ") + webfin = web + if user != "" and passwd != "": + wl = web.split("://") + webfin = wl[0] + "://" + user + ":" + passwd + "@" + wl[1] + if port not in [80, 443]: + webfin = webfin + ":" + str(port) + #vars.targets.append(webfin) + newTarget = Target(po, ip) + newTarget.port = port + newTarget.urluser = user + newTarget.urlpasswd = passwd + newTarget.fullurl = webfin + vars.targets.append(newTarget) + print(O+" [+] Target added:"+C+color.TR3+C+G+webfin+C+color.TR2+C) + + except socket.gaierror: + print(R + " [-] " + "\033[0m" + color.UNDERLINE + "\033[1m" + "Target seems down...") + pass + + except KeyboardInterrupt: + pass + +def inputip(target, net=False): + v4 = target.split(".") + v6 = target.split(":") + try: + if len(v4) == 4 and (i.isdigit() for i in v4): + if (int(i) in range(0,256) for i in v4): + if not net: + print(" [+] IPv4 detected!") + elif len(v6) == 8 and (len(i) in range(0,5) for i in v6): + if (int(i) for i in v6): + if not net: + print(" [+] IPv6 detected!") + else: + print(R + " [-] " + "\033[0m" + color.UNDERLINE + "\033[1m" + "Invalid IP: {}".format(target)) + except ValueError: + print(R + " [-] " + "\033[0m" + color.UNDERLINE + "\033[1m" + "Invalid IP: {}".format(target)) + pass + + newTarget = Target(target, target) + newTarget.fullurl = target + + if net: + vars.targets.append(newTarget) + print(O+" [+] Target added:"+C+color.TR3+C+G+target+C+color.TR2+C) + elif os.system("ping -c 1 -q -W 5 " + target + " > /dev/null") == 0: + vars.targets.append(newTarget) + print(O+" [+] Target added:"+C+color.TR3+C+G+target+C+color.TR2+C) + else: + print(R + " [-] " + "\033[0m" + color.UNDERLINE + "\033[1m" + "Target seems down...") + +def inputnet(target): + net = target.split("/") + if len(net) != 2: + print(R + " [-] " + "\033[0m" + color.UNDERLINE + "\033[1m" + "Syntax: NRange/NMask") + else: + range = net[0] + mask = net[1] + try: + import modules.ScanningEnumeration.arpscan as sca + sca.properties["IP"][1] = range + sca.properties["NMASK"][1] = mask + targets = sca.attack("") + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect(('google.com', 0)) + attackerip = s.getsockname()[0].strip() + for targetip in targets: + if targetip != attackerip: + inputip(targetip, net=True) + #print("'{}','{}'".format(targetip, attackerip)) + except Exception as e: + print(e) diff --git a/core/methods/loot.py b/core/methods/loot.py new file mode 100644 index 00000000..41a027a1 --- /dev/null +++ b/core/methods/loot.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +_____, ___ + '+ .;. + , ;. + . :, + ;'. + .. + .;. + .; + : + , + + +┌─[TIDoS]─[] +└──╼ VainlyStrain +""" + +# TODO: create code for loot.py diff --git a/core/methods/multiproc.py b/core/methods/multiproc.py new file mode 100644 index 00000000..a22c03e2 --- /dev/null +++ b/core/methods/multiproc.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +_____, ___ + '+ .; + , ; + . + + . + .;. + .; + : + , + + +┌─[TIDoS]─[] +└──╼ VainlyStrain +""" + + +def listsplit(l, n): + """Yield successive n-sized chunks from l.""" + if n == 0: + n += 1 + for i in range(0, len(l), n): + yield l[i:i + n] + +def file2list(path): + lines = [] + with open(path, "r") as f: + for line in f: + lines.append(line.strip("\n")) + return lines diff --git a/core/methods/netinfo.py b/core/methods/netinfo.py new file mode 100644 index 00000000..8b45eda9 --- /dev/null +++ b/core/methods/netinfo.py @@ -0,0 +1,52 @@ + + +import os +import platform +import socket +import time + +from core.Core.colors import * +from core.methods.tor import session +from core.variables import interface + + +request = session() +mac_address = os.popen("cat /sys/class/net/{}/address".format(interface)).read().strip() +s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +s.connect(('google.com', 0)) +localaddr = s.getsockname()[0].strip() # local subnet +ipaddr = request.get('http://ip.42.pl/raw').text.strip() +def_gw_device = os.popen("route | grep '^default' | grep -o '[^ ]*$'").read().strip() + + +def info(): + print() + #print("\n" + O + " +======================================================+" + color.END) + #print("" + GR + " +------------------------------------+") + time.sleep(0.1) + print(" |: " + O + "Mac Address:" + C + color.TR3 +C + G + mac_address + C + color.TR2 + C) + #print(" |: " + O + "Mac Address:" + C + color.TR3 +C + G + mac_address + C + color.TR2 + C) + # time.sleep (0.1) + #print("" + GR + " +------------------------------------+") + time.sleep(0.1) + print(" |: " + O + "Local address:" + C + color.TR3 +C + G + localaddr + C + color.TR2 + C) + # time.sleep (0.1) + #print("" + GR + " +------------------------------------+") + time.sleep(0.1) + print(" |: " + O + "IP:" + C + color.TR3 +C + G + ipaddr + C + color.TR2 + C) + # time.sleep (0.1) + #print("" + GR + " +------------------------------------+") + time.sleep(0.1) + print(" |: " + O + "Operating System:" + C + color.TR3 +C + G + platform.system() + C + color.TR2 + C) + # time.sleep (0.1) + #print("" + GR + " +------------------------------------+") + time.sleep(0.1) + print(" |: " + O + "Name:" + C + color.TR3 +C + G + platform.node() + C + color.TR2 + C) + # time.sleep (0.1) + #print("" + GR + " +------------------------------------+") + time.sleep(0.1) + print(" |: " + O + "Interface:" + C + color.TR3 +C + G + def_gw_device + C + color.TR2 + C) + # time.sleep (0.1) + #print("" + GR + " +------------------------------------+" + color.END) + #print("" + O + " +=======================================================+\n") + print() \ No newline at end of file diff --git a/core/methods/parser.py b/core/methods/parser.py new file mode 100644 index 00000000..bd837174 --- /dev/null +++ b/core/methods/parser.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +_____, ___ + '+ .; + , ; + . + + . + .;. + .; + : + , + + +┌─[TIDoS]─[] +└──╼ VainlyStrain +""" + +import argparse +import sys +from core.methods.print import banner +from core.Core.colors import color, RB, C, RC, R, RD + +class VaileParser(argparse.ArgumentParser): + def error(self, message): + banner() + self.print_usage(sys.stderr) + self.exit(2, '{}[-]\033[0m\033[1m Invalid/missing params\033[0m\n{}[HINT]\033[0m\033[0m {}\n'.format(R, R, message)) + def print_help(self): + print('''{}Vsynta.:{} {}tidos{} [-v VIC] [-p] [-a CA] [-s] + [-f] [-l M] [-h] [-c VAL] + [--app] [-q] + + -v VIC, --victim VIC {}Target to attack per cli{} + -l M, --load M {}Module to load per cli{} + -a CA, --list CA {}List modules of CA{} + -p, --tor {}Pipe Attacks thro. Tor?{} + -s, --session {}Is VIC a session file?{} + -q, --quiet {}Start Console quietly{} + -f, --fetch {}Check for & install updates{} + -c, --file {}Automation using VAL file{} + --app {}Run TIDoS graphical interface{}'''.format(RC, color.END, RB, color.END, RC, color.END, RC, color.END, RC, color.END, RC, color.END, RC, color.END, RC, color.END, RC, color.END, RC, color.END, RC, color.END)) + +class VaileFormatter(argparse.RawDescriptionHelpFormatter): + def add_usage(self, usage, actions, groups, prefix=None): + if prefix is None: + prefix = RD + 'Vsynta ' + color.END + return super(VaileFormatter, self).add_usage("{}tidos{} [-v VIC] [-p] [-a CA] [-s]\n [-f] [-l M] [-h] [-c VAL]\n [--app] [-q]".format(RB,color.END), actions, groups, prefix) + +def build_parser(): + p = VaileParser(formatter_class=VaileFormatter, add_help=False) + p.add_argument('-v', '--victim', + help='Target to attack (without loading entire framework)', + metavar='VIC' + ) + p.add_argument('-l', '--load', + help='Module to use (without loading entire framework)', + metavar='M' + ) + p.add_argument('-h', '--help', + help="Display this help message and exit", + action="https://github.com/store_true" + ) + p.add_argument('-s', '--session', + help="Is --victim a session file?", + action="https://github.com/store_true" + ) + p.add_argument('-p', '--tor', + help="Pipe Attacks through Tor?", + action="https://github.com/store_true" + ) + p.add_argument('-a', '--list', + help='List modules of category CA', + metavar='CA' + ) + p.add_argument('-q', '--quiet', + help='Start Console quietly', + action='https://github.com/store_true' + ) + p.add_argument('-f', '--fetch', + help='Check for and install updates.', + action='https://github.com/store_true' + ) + p.add_argument('-c', '--file', + help='Automation using VAL file', + metavar='VAL' + ) + #p.epilog = "Beware, my friend. These are dark times." + return p diff --git a/core/methods/print.py b/core/methods/print.py new file mode 100644 index 00000000..3b5ee121 --- /dev/null +++ b/core/methods/print.py @@ -0,0 +1,669 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +_____, ___ + '+ .;. + , ;. + . :, + ;'. + .. + .;. + .; + : + , + + +┌─[TIDoS]─[] +└──╼ VainlyStrain +""" + +import os, re +import time +import random +from time import sleep +from datetime import datetime +from random import uniform as rflt +import threading + +from core import variables as vars +#from core.methods.select import modulecount +from core.methods.fetch import fetchinit +from core.Core.colors import * + + +def loadstyle(): + success = False + thread = threading.Thread(target=fetchinit, args=(1,)) + thread.start() + os.system(vars.CMD_CLEAR) + red_bold = R + cursive = color.END + "\033[3m" + reset = cursive + loading = "Loading console.." + swappy = "Loading console.." + display = """ + + + + + +____, __{} + + ; + .:, + ' + . {}<>{} {}T I D : 2{} {}<>{} + + ; {}{}{} + ;. + ; + ; + ' + """.format(color.END, R, color.END, RB, color.END, R, C, cursive, swappy, color.END) + + action = 0 + #while action < 2: + while thread.is_alive(): + for i, char in enumerate(loading): + if i == 0: + swappy = "%s%s%s%s" % (red_bold, char.swapcase(), reset, loading[1:]) + #print("%s%s%s%s" % (red_bold, char.swapcase(), reset, loading[1:])) + elif i == 1: + old_loading = loading[0].swapcase() + swappy = "%s%s%s%s%s" % (old_loading, red_bold, char.swapcase(), reset, loading[2:]) + #print("%s%s%s%s%s" % (old_loading, red_bold, char.swapcase(), reset, loading[2:])) + elif i == i: + old_loading = loading[-0:i] + swappy = "%s%s%s%s%s" % (old_loading, red_bold, char.swapcase(), reset, loading[i + 1:]) + #print("%s%s%s%s%s" % (old_loading, red_bold, char.swapcase(), reset, loading[i + 1:])) + display = """ + + + + + +____, __{} + + ; + .:, + ' + . {}<>{} {}T I D : 2{} {}<>{} + + ; {}{}{} + ;. + ; + ; + ' + """.format(color.END, R, color.END, RB, color.END, R, C, cursive, swappy, color.END) + print(display) + time.sleep(0.1) + os.system(vars.CMD_CLEAR) + action += 1 + + + +vaile = '''{0} | + : + | + . + . + . +____, __ .| + + ; .| + .{1}:, + ' + . / + + ; :, + ;. /, + {0} ; /;' ; + ; /;{2}|{0} : ^ + ' / {2}:{0} ;.' * + '/; \\ + ./ '. \\ {2}|{0} + '. '- __\\,_ + {1} '. {0}\\{1}`{2};{0}{1} + \\ {0}\\ {1} + .\\. {0}V{1} + \\. + .,. + .'. + ''.;: + .|. + | . + . + +'''.format(color.END, color.BOLD, color.CURSIVE) + +metasploit_hakcers = ''' ,˛ + .:oDFo:. + ./ymM0dayMmy/. + -+dHJ5aGFyZGVyIQ==+- + .:sm⏣~~Destroy.No.Data~~s:` + -+h2~~Maintain.No.Persistence~~h+- + .:odNo2~~Above.All.Else.Do.No.Harm~~Ndo:` + ./etc/shadow.0days-Data'%20OR%201=1--.No.0MN8'/. + -++SecKCoin++e.AMd` `.-://///+hbove.913.ElsMNh+- + -~/.ssh/id_rsa.Des- `htN01UserWroteMe!- + :dopeAW.Noo :is:TЯiKC.sudo-.A: + :we're.all.alike'` The.PFYroy.No.D7: + :PLACEDRINKHERE!: yxp_cmdshell.Ab0: + :msf>exploit -j. :Ns.BOB&ALICEes7: + :---srwxrwx:-.` `MS146.52.No.Per: + : - -Hello, world! - - -''' - soup = self.soup(html) - self.assertEqual("text/javascript", soup.find('script')['type']) - - def test_comment(self): - # Comments are represented as Comment objects. - markup = "

foobaz

" - self.assertSoupEquals(markup) - - soup = self.soup(markup) - comment = soup.find(text="foobar") - self.assertEqual(comment.__class__, Comment) - - # The comment is properly integrated into the tree. - foo = soup.find(text="foo") - self.assertEqual(comment, foo.next_element) - baz = soup.find(text="baz") - self.assertEqual(comment, baz.previous_element) - - def test_preserved_whitespace_in_pre_and_textarea(self): - """Whitespace must be preserved in
 and "
-        self.assertSoupEquals(pre_markup)
-        self.assertSoupEquals(textarea_markup)
-
-        soup = self.soup(pre_markup)
-        self.assertEqual(soup.pre.prettify(), pre_markup)
-
-        soup = self.soup(textarea_markup)
-        self.assertEqual(soup.textarea.prettify(), textarea_markup)
-
-        soup = self.soup("")
-        self.assertEqual(soup.textarea.prettify(), "")
-
-    def test_nested_inline_elements(self):
-        """Inline elements can be nested indefinitely."""
-        b_tag = "Inside a B tag"
-        self.assertSoupEquals(b_tag)
-
-        nested_b_tag = "

A nested tag

" - self.assertSoupEquals(nested_b_tag) - - double_nested_b_tag = "

A doubly nested tag

" - self.assertSoupEquals(nested_b_tag) - - def test_nested_block_level_elements(self): - """Block elements can be nested.""" - soup = self.soup('

Foo

') - blockquote = soup.blockquote - self.assertEqual(blockquote.p.b.string, 'Foo') - self.assertEqual(blockquote.b.string, 'Foo') - - def test_correctly_nested_tables(self): - """One table can go inside another one.""" - markup = ('' - '' - "') - - self.assertSoupEquals( - markup, - '
Here's another table:" - '' - '' - '
foo
Here\'s another table:' - '
foo
' - '
') - - self.assertSoupEquals( - "" - "" - "
Foo
Bar
Baz
") - - def test_deeply_nested_multivalued_attribute(self): - # html5lib can set the attributes of the same tag many times - # as it rearranges the tree. This has caused problems with - # multivalued attributes. - markup = '
' - soup = self.soup(markup) - self.assertEqual(["css"], soup.div.div['class']) - - def test_multivalued_attribute_on_html(self): - # html5lib uses a different API to set the attributes ot the - # tag. This has caused problems with multivalued - # attributes. - markup = '' - soup = self.soup(markup) - self.assertEqual(["a", "b"], soup.html['class']) - - def test_angle_brackets_in_attribute_values_are_escaped(self): - self.assertSoupEquals('', '') - - def test_entities_in_attributes_converted_to_unicode(self): - expect = u'

' - self.assertSoupEquals('

', expect) - self.assertSoupEquals('

', expect) - self.assertSoupEquals('

', expect) - self.assertSoupEquals('

', expect) - - def test_entities_in_text_converted_to_unicode(self): - expect = u'

pi\N{LATIN SMALL LETTER N WITH TILDE}ata

' - self.assertSoupEquals("

piñata

", expect) - self.assertSoupEquals("

piñata

", expect) - self.assertSoupEquals("

piñata

", expect) - self.assertSoupEquals("

piñata

", expect) - - def test_quot_entity_converted_to_quotation_mark(self): - self.assertSoupEquals("

I said "good day!"

", - '

I said "good day!"

') - - def test_out_of_range_entity(self): - expect = u"\N{REPLACEMENT CHARACTER}" - self.assertSoupEquals("�", expect) - self.assertSoupEquals("�", expect) - self.assertSoupEquals("�", expect) - - def test_multipart_strings(self): - "Mostly to prevent a recurrence of a bug in the html5lib treebuilder." - soup = self.soup("

\nfoo

") - self.assertEqual("p", soup.h2.string.next_element.name) - self.assertEqual("p", soup.p.name) - self.assertConnectedness(soup) - - def test_head_tag_between_head_and_body(self): - "Prevent recurrence of a bug in the html5lib treebuilder." - content = """ - - foo - -""" - soup = self.soup(content) - self.assertNotEqual(None, soup.html.body) - self.assertConnectedness(soup) - - def test_multiple_copies_of_a_tag(self): - "Prevent recurrence of a bug in the html5lib treebuilder." - content = """ - - - - - -""" - soup = self.soup(content) - self.assertConnectedness(soup.article) - - def test_basic_namespaces(self): - """Parsers don't need to *understand* namespaces, but at the - very least they should not choke on namespaces or lose - data.""" - - markup = b'4' - soup = self.soup(markup) - self.assertEqual(markup, soup.encode()) - html = soup.html - self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns']) - self.assertEqual( - 'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml']) - self.assertEqual( - 'http://www.w3.org/2000/svg', soup.html['xmlns:svg']) - - def test_multivalued_attribute_value_becomes_list(self): - markup = b'' - soup = self.soup(markup) - self.assertEqual(['foo', 'bar'], soup.a['class']) - - # - # Generally speaking, tests below this point are more tests of - # Beautiful Soup than tests of the tree builders. But parsers are - # weird, so we run these tests separately for every tree builder - # to detect any differences between them. - # - - def test_can_parse_unicode_document(self): - # A seemingly innocuous document... but it's in Unicode! And - # it contains characters that can't be represented in the - # encoding found in the declaration! The horror! - markup = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' - soup = self.soup(markup) - self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string) - - def test_soupstrainer(self): - """Parsers should be able to work with SoupStrainers.""" - strainer = SoupStrainer("b") - soup = self.soup("A bold statement", - parse_only=strainer) - self.assertEqual(soup.decode(), "bold") - - def test_single_quote_attribute_values_become_double_quotes(self): - self.assertSoupEquals("", - '') - - def test_attribute_values_with_nested_quotes_are_left_alone(self): - text = """a""" - self.assertSoupEquals(text) - - def test_attribute_values_with_double_nested_quotes_get_quoted(self): - text = """a""" - soup = self.soup(text) - soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' - self.assertSoupEquals( - soup.foo.decode(), - """a""") - - def test_ampersand_in_attribute_value_gets_escaped(self): - self.assertSoupEquals('', - '') - - self.assertSoupEquals( - 'foo', - 'foo') - - def test_escaped_ampersand_in_attribute_value_is_left_alone(self): - self.assertSoupEquals('') - - def test_entities_in_strings_converted_during_parsing(self): - # Both XML and HTML entities are converted to Unicode characters - # during parsing. - text = "

<<sacré bleu!>>

" - expected = u"

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

" - self.assertSoupEquals(text, expected) - - def test_smart_quotes_converted_on_the_way_in(self): - # Microsoft smart quotes are converted to Unicode characters during - # parsing. - quote = b"

\x91Foo\x92

" - soup = self.soup(quote) - self.assertEqual( - soup.p.string, - u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") - - def test_non_breaking_spaces_converted_on_the_way_in(self): - soup = self.soup("  ") - self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2) - - def test_entities_converted_on_the_way_out(self): - text = "

<<sacré bleu!>>

" - expected = u"

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

".encode("utf-8") - soup = self.soup(text) - self.assertEqual(soup.p.encode("utf-8"), expected) - - def test_real_iso_latin_document(self): - # Smoke test of interrelated functionality, using an - # easy-to-understand document. - - # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. - unicode_html = u'

Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!

' - - # That's because we're going to encode it into ISO-Latin-1, and use - # that to test. - iso_latin_html = unicode_html.encode("iso-8859-1") - - # Parse the ISO-Latin-1 HTML. - soup = self.soup(iso_latin_html) - # Encode it to UTF-8. - result = soup.encode("utf-8") - - # What do we expect the result to look like? Well, it would - # look like unicode_html, except that the META tag would say - # UTF-8 instead of ISO-Latin-1. - expected = unicode_html.replace("ISO-Latin-1", "utf-8") - - # And, of course, it would be in UTF-8, not Unicode. - expected = expected.encode("utf-8") - - # Ta-da! - self.assertEqual(result, expected) - - def test_real_shift_jis_document(self): - # Smoke test to make sure the parser can handle a document in - # Shift-JIS encoding, without choking. - shift_jis_html = ( - b'
'
-            b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
-            b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
-            b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
-            b'
') - unicode_html = shift_jis_html.decode("shift-jis") - soup = self.soup(unicode_html) - - # Make sure the parse tree is correctly encoded to various - # encodings. - self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8")) - self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp")) - - def test_real_hebrew_document(self): - # A real-world test to make sure we can convert ISO-8859-9 (a - # Hebrew encoding) to UTF-8. - hebrew_document = b'Hebrew (ISO 8859-8) in Visual Directionality

Hebrew (ISO 8859-8) in Visual Directionality

\xed\xe5\xec\xf9' - soup = self.soup( - hebrew_document, from_encoding="iso8859-8") - # Some tree builders call it iso8859-8, others call it iso-8859-9. - # That's not a difference we really care about. - assert soup.original_encoding in ('iso8859-8', 'iso-8859-8') - self.assertEqual( - soup.encode('utf-8'), - hebrew_document.decode("iso8859-8").encode("utf-8")) - - def test_meta_tag_reflects_current_encoding(self): - # Here's the tag saying that a document is - # encoded in Shift-JIS. - meta_tag = ('') - - # Here's a document incorporating that meta tag. - shift_jis_html = ( - '\n%s\n' - '' - 'Shift-JIS markup goes here.') % meta_tag - soup = self.soup(shift_jis_html) - - # Parse the document, and the charset is seemingly unaffected. - parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'}) - content = parsed_meta['content'] - self.assertEqual('text/html; charset=x-sjis', content) - - # But that value is actually a ContentMetaAttributeValue object. - self.assertTrue(isinstance(content, ContentMetaAttributeValue)) - - # And it will take on a value that reflects its current - # encoding. - self.assertEqual('text/html; charset=utf8', content.encode("utf8")) - - # For the rest of the story, see TestSubstitutions in - # test_tree.py. - - def test_html5_style_meta_tag_reflects_current_encoding(self): - # Here's the tag saying that a document is - # encoded in Shift-JIS. - meta_tag = ('') - - # Here's a document incorporating that meta tag. - shift_jis_html = ( - '\n%s\n' - '' - 'Shift-JIS markup goes here.') % meta_tag - soup = self.soup(shift_jis_html) - - # Parse the document, and the charset is seemingly unaffected. - parsed_meta = soup.find('meta', id="encoding") - charset = parsed_meta['charset'] - self.assertEqual('x-sjis', charset) - - # But that value is actually a CharsetMetaAttributeValue object. - self.assertTrue(isinstance(charset, CharsetMetaAttributeValue)) - - # And it will take on a value that reflects its current - # encoding. - self.assertEqual('utf8', charset.encode("utf8")) - - def test_tag_with_no_attributes_can_have_attributes_added(self): - data = self.soup("text") - data.a['foo'] = 'bar' - self.assertEqual('text', data.a.decode()) - -class XMLTreeBuilderSmokeTest(object): - - def test_pickle_and_unpickle_identity(self): - # Pickling a tree, then unpickling it, yields a tree identical - # to the original. - tree = self.soup("foo") - dumped = pickle.dumps(tree, 2) - loaded = pickle.loads(dumped) - self.assertEqual(loaded.__class__, BeautifulSoup) - self.assertEqual(loaded.decode(), tree.decode()) - - def test_docstring_generated(self): - soup = self.soup("") - self.assertEqual( - soup.encode(), b'\n') - - def test_xml_declaration(self): - markup = b"""\n""" - soup = self.soup(markup) - self.assertEqual(markup, soup.encode("utf8")) - - def test_processing_instruction(self): - markup = b"""\n""" - soup = self.soup(markup) - self.assertEqual(markup, soup.encode("utf8")) - - def test_real_xhtml_document(self): - """A real XHTML document should come out *exactly* the same as it went in.""" - markup = b""" - - -Hello. -Goodbye. -""" - soup = self.soup(markup) - self.assertEqual( - soup.encode("utf-8"), markup) - - def test_formatter_processes_script_tag_for_xml_documents(self): - doc = """ - -""" - soup = BeautifulSoup(doc, "lxml-xml") - # lxml would have stripped this while parsing, but we can add - # it later. - soup.script.string = 'console.log("< < hey > > ");' - encoded = soup.encode() - self.assertTrue(b"< < hey > >" in encoded) - - def test_can_parse_unicode_document(self): - markup = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' - soup = self.soup(markup) - self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string) - - def test_popping_namespaced_tag(self): - markup = 'b2012-07-02T20:33:42Zcd' - soup = self.soup(markup) - self.assertEqual( - unicode(soup.rss), markup) - - def test_docstring_includes_correct_encoding(self): - soup = self.soup("") - self.assertEqual( - soup.encode("latin1"), - b'\n') - - def test_large_xml_document(self): - """A large XML document should come out the same as it went in.""" - markup = (b'\n' - + b'0' * (2**12) - + b'') - soup = self.soup(markup) - self.assertEqual(soup.encode("utf-8"), markup) - - - def test_tags_are_empty_element_if_and_only_if_they_are_empty(self): - self.assertSoupEquals("

", "

") - self.assertSoupEquals("

foo

") - - def test_namespaces_are_preserved(self): - markup = 'This tag is in the a namespaceThis tag is in the b namespace' - soup = self.soup(markup) - root = soup.root - self.assertEqual("http://example.com/", root['xmlns:a']) - self.assertEqual("http://example.net/", root['xmlns:b']) - - def test_closing_namespaced_tag(self): - markup = '

20010504

' - soup = self.soup(markup) - self.assertEqual(unicode(soup.p), markup) - - def test_namespaced_attributes(self): - markup = '' - soup = self.soup(markup) - self.assertEqual(unicode(soup.foo), markup) - - def test_namespaced_attributes_xml_namespace(self): - markup = 'bar' - soup = self.soup(markup) - self.assertEqual(unicode(soup.foo), markup) - -class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): - """Smoke test for a tree builder that supports HTML5.""" - - def test_real_xhtml_document(self): - # Since XHTML is not HTML5, HTML5 parsers are not tested to handle - # XHTML documents in any particular way. - pass - - def test_html_tags_have_namespace(self): - markup = "" - soup = self.soup(markup) - self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace) - - def test_svg_tags_have_namespace(self): - markup = '' - soup = self.soup(markup) - namespace = "http://www.w3.org/2000/svg" - self.assertEqual(namespace, soup.svg.namespace) - self.assertEqual(namespace, soup.circle.namespace) - - - def test_mathml_tags_have_namespace(self): - markup = '5' - soup = self.soup(markup) - namespace = 'http://www.w3.org/1998/Math/MathML' - self.assertEqual(namespace, soup.math.namespace) - self.assertEqual(namespace, soup.msqrt.namespace) - - def test_xml_declaration_becomes_comment(self): - markup = '' - soup = self.soup(markup) - self.assertTrue(isinstance(soup.contents[0], Comment)) - self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?') - self.assertEqual("html", soup.contents[0].next_element.name) - -def skipIf(condition, reason): - def nothing(test, *args, **kwargs): - return None - - def decorator(test_item): - if condition: - return nothing - else: - return test_item - - return decorator diff --git a/lib/bs4/bs4/tests/__init__.py b/lib/bs4/bs4/tests/__init__.py deleted file mode 100644 index 142c8cc3..00000000 --- a/lib/bs4/bs4/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"The beautifulsoup tests." diff --git a/lib/bs4/bs4/tests/test_builder_registry.py b/lib/bs4/bs4/tests/test_builder_registry.py deleted file mode 100644 index 90cad829..00000000 --- a/lib/bs4/bs4/tests/test_builder_registry.py +++ /dev/null @@ -1,147 +0,0 @@ -"""Tests of the builder registry.""" - -import unittest -import warnings - -from bs4 import BeautifulSoup -from bs4.builder import ( - builder_registry as registry, - HTMLParserTreeBuilder, - TreeBuilderRegistry, -) - -try: - from bs4.builder import HTML5TreeBuilder - HTML5LIB_PRESENT = True -except ImportError: - HTML5LIB_PRESENT = False - -try: - from bs4.builder import ( - LXMLTreeBuilderForXML, - LXMLTreeBuilder, - ) - LXML_PRESENT = True -except ImportError: - LXML_PRESENT = False - - -class BuiltInRegistryTest(unittest.TestCase): - """Test the built-in registry with the default builders registered.""" - - def test_combination(self): - if LXML_PRESENT: - self.assertEqual(registry.lookup('fast', 'html'), - LXMLTreeBuilder) - - if LXML_PRESENT: - self.assertEqual(registry.lookup('permissive', 'xml'), - LXMLTreeBuilderForXML) - self.assertEqual(registry.lookup('strict', 'html'), - HTMLParserTreeBuilder) - if HTML5LIB_PRESENT: - self.assertEqual(registry.lookup('html5lib', 'html'), - HTML5TreeBuilder) - - def test_lookup_by_markup_type(self): - if LXML_PRESENT: - self.assertEqual(registry.lookup('html'), LXMLTreeBuilder) - self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML) - else: - self.assertEqual(registry.lookup('xml'), None) - if HTML5LIB_PRESENT: - self.assertEqual(registry.lookup('html'), HTML5TreeBuilder) - else: - self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder) - - def test_named_library(self): - if LXML_PRESENT: - self.assertEqual(registry.lookup('lxml', 'xml'), - LXMLTreeBuilderForXML) - self.assertEqual(registry.lookup('lxml', 'html'), - LXMLTreeBuilder) - if HTML5LIB_PRESENT: - self.assertEqual(registry.lookup('html5lib'), - HTML5TreeBuilder) - - self.assertEqual(registry.lookup('html.parser'), - HTMLParserTreeBuilder) - - def test_beautifulsoup_constructor_does_lookup(self): - - with warnings.catch_warnings(record=True) as w: - # This will create a warning about not explicitly - # specifying a parser, but we'll ignore it. - - # You can pass in a string. - BeautifulSoup("", features="html") - # Or a list of strings. - BeautifulSoup("", features=["html", "fast"]) - - # You'll get an exception if BS can't find an appropriate - # builder. - self.assertRaises(ValueError, BeautifulSoup, - "", features="no-such-feature") - -class RegistryTest(unittest.TestCase): - """Test the TreeBuilderRegistry class in general.""" - - def setUp(self): - self.registry = TreeBuilderRegistry() - - def builder_for_features(self, *feature_list): - cls = type('Builder_' + '_'.join(feature_list), - (object,), {'features' : feature_list}) - - self.registry.register(cls) - return cls - - def test_register_with_no_features(self): - builder = self.builder_for_features() - - # Since the builder advertises no features, you can't find it - # by looking up features. - self.assertEqual(self.registry.lookup('foo'), None) - - # But you can find it by doing a lookup with no features, if - # this happens to be the only registered builder. - self.assertEqual(self.registry.lookup(), builder) - - def test_register_with_features_makes_lookup_succeed(self): - builder = self.builder_for_features('foo', 'bar') - self.assertEqual(self.registry.lookup('foo'), builder) - self.assertEqual(self.registry.lookup('bar'), builder) - - def test_lookup_fails_when_no_builder_implements_feature(self): - builder = self.builder_for_features('foo', 'bar') - self.assertEqual(self.registry.lookup('baz'), None) - - def test_lookup_gets_most_recent_registration_when_no_feature_specified(self): - builder1 = self.builder_for_features('foo') - builder2 = self.builder_for_features('bar') - self.assertEqual(self.registry.lookup(), builder2) - - def test_lookup_fails_when_no_tree_builders_registered(self): - self.assertEqual(self.registry.lookup(), None) - - def test_lookup_gets_most_recent_builder_supporting_all_features(self): - has_one = self.builder_for_features('foo') - has_the_other = self.builder_for_features('bar') - has_both_early = self.builder_for_features('foo', 'bar', 'baz') - has_both_late = self.builder_for_features('foo', 'bar', 'quux') - lacks_one = self.builder_for_features('bar') - has_the_other = self.builder_for_features('foo') - - # There are two builders featuring 'foo' and 'bar', but - # the one that also features 'quux' was registered later. - self.assertEqual(self.registry.lookup('foo', 'bar'), - has_both_late) - - # There is only one builder featuring 'foo', 'bar', and 'baz'. - self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'), - has_both_early) - - def test_lookup_fails_when_cannot_reconcile_requested_features(self): - builder1 = self.builder_for_features('foo', 'bar') - builder2 = self.builder_for_features('foo', 'baz') - self.assertEqual(self.registry.lookup('bar', 'baz'), None) diff --git a/lib/bs4/bs4/tests/test_docs.py b/lib/bs4/bs4/tests/test_docs.py deleted file mode 100644 index 5b9f6770..00000000 --- a/lib/bs4/bs4/tests/test_docs.py +++ /dev/null @@ -1,36 +0,0 @@ -"Test harness for doctests." - -# pylint: disable-msg=E0611,W0142 - -__metaclass__ = type -__all__ = [ - 'additional_tests', - ] - -import atexit -import doctest -import os -#from pkg_resources import ( -# resource_filename, resource_exists, resource_listdir, cleanup_resources) -import unittest - -DOCTEST_FLAGS = ( - doctest.ELLIPSIS | - doctest.NORMALIZE_WHITESPACE | - doctest.REPORT_NDIFF) - - -# def additional_tests(): -# "Run the doc tests (README.txt and docs/*, if any exist)" -# doctest_files = [ -# os.path.abspath(resource_filename('bs4', 'README.txt'))] -# if resource_exists('bs4', 'docs'): -# for name in resource_listdir('bs4', 'docs'): -# if name.endswith('.txt'): -# doctest_files.append( -# os.path.abspath( -# resource_filename('bs4', 'docs/%s' % name))) -# kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS) -# atexit.register(cleanup_resources) -# return unittest.TestSuite(( -# doctest.DocFileSuite(*doctest_files, **kwargs))) diff --git a/lib/bs4/bs4/tests/test_html5lib.py b/lib/bs4/bs4/tests/test_html5lib.py deleted file mode 100644 index 8e3cba68..00000000 --- a/lib/bs4/bs4/tests/test_html5lib.py +++ /dev/null @@ -1,109 +0,0 @@ -"""Tests to ensure that the html5lib tree builder generates good trees.""" - -import warnings - -try: - from bs4.builder import HTML5TreeBuilder - HTML5LIB_PRESENT = True -except ImportError, e: - HTML5LIB_PRESENT = False -from bs4.element import SoupStrainer -from bs4.testing import ( - HTML5TreeBuilderSmokeTest, - SoupTest, - skipIf, -) - -@skipIf( - not HTML5LIB_PRESENT, - "html5lib seems not to be present, not testing its tree builder.") -class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): - """See ``HTML5TreeBuilderSmokeTest``.""" - - @property - def default_builder(self): - return HTML5TreeBuilder() - - def test_soupstrainer(self): - # The html5lib tree builder does not support SoupStrainers. - strainer = SoupStrainer("b") - markup = "

A bold statement.

" - with warnings.catch_warnings(record=True) as w: - soup = self.soup(markup, parse_only=strainer) - self.assertEqual( - soup.decode(), self.document_for(markup)) - - self.assertTrue( - "the html5lib tree builder doesn't support parse_only" in - str(w[0].message)) - - def test_correctly_nested_tables(self): - """html5lib inserts tags where other parsers don't.""" - markup = ('' - '' - "') - - self.assertSoupEquals( - markup, - '
Here's another table:" - '' - '' - '
foo
Here\'s another table:' - '
foo
' - '
') - - self.assertSoupEquals( - "" - "" - "
Foo
Bar
Baz
") - - def test_xml_declaration_followed_by_doctype(self): - markup = ''' - - - - - -

foo

- -''' - soup = self.soup(markup) - # Verify that we can reach the

tag; this means the tree is connected. - self.assertEqual(b"

foo

", soup.p.encode()) - - def test_reparented_markup(self): - markup = '

foo

\n

bar

' - soup = self.soup(markup) - self.assertEqual(u"

foo

\n

bar

", soup.body.decode()) - self.assertEqual(2, len(soup.find_all('p'))) - - - def test_reparented_markup_ends_with_whitespace(self): - markup = '

foo

\n

bar

\n' - soup = self.soup(markup) - self.assertEqual(u"

foo

\n

bar

\n", soup.body.decode()) - self.assertEqual(2, len(soup.find_all('p'))) - - def test_reparented_markup_containing_identical_whitespace_nodes(self): - """Verify that we keep the two whitespace nodes in this - document distinct when reparenting the adjacent tags. - """ - markup = '
' - soup = self.soup(markup) - space1, space2 = soup.find_all(string=' ') - tbody1, tbody2 = soup.find_all('tbody') - assert space1.next_element is tbody1 - assert tbody2.next_element is space2 - - def test_processing_instruction(self): - """Processing instructions become comments.""" - markup = b"""""" - soup = self.soup(markup) - assert str(soup).startswith("") - - def test_cloned_multivalue_node(self): - markup = b"""

""" - soup = self.soup(markup) - a1, a2 = soup.find_all('a') - self.assertEqual(a1, a2) - assert a1 is not a2 diff --git a/lib/bs4/bs4/tests/test_htmlparser.py b/lib/bs4/bs4/tests/test_htmlparser.py deleted file mode 100644 index b45e35f9..00000000 --- a/lib/bs4/bs4/tests/test_htmlparser.py +++ /dev/null @@ -1,32 +0,0 @@ -"""Tests to ensure that the html.parser tree builder generates good -trees.""" - -from pdb import set_trace -import pickle -from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest -from bs4.builder import HTMLParserTreeBuilder - -class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): - - @property - def default_builder(self): - return HTMLParserTreeBuilder() - - def test_namespaced_system_doctype(self): - # html.parser can't handle namespaced doctypes, so skip this one. - pass - - def test_namespaced_public_doctype(self): - # html.parser can't handle namespaced doctypes, so skip this one. - pass - - def test_builder_is_pickled(self): - """Unlike most tree builders, HTMLParserTreeBuilder and will - be restored after pickling. - """ - tree = self.soup("foo") - dumped = pickle.dumps(tree, 2) - loaded = pickle.loads(dumped) - self.assertTrue(isinstance(loaded.builder, type(tree.builder))) - - diff --git a/lib/bs4/bs4/tests/test_lxml.py b/lib/bs4/bs4/tests/test_lxml.py deleted file mode 100644 index a05870b9..00000000 --- a/lib/bs4/bs4/tests/test_lxml.py +++ /dev/null @@ -1,76 +0,0 @@ -"""Tests to ensure that the lxml tree builder generates good trees.""" - -import re -import warnings - -try: - import lxml.etree - LXML_PRESENT = True - LXML_VERSION = lxml.etree.LXML_VERSION -except ImportError, e: - LXML_PRESENT = False - LXML_VERSION = (0,) - -if LXML_PRESENT: - from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML - -from bs4 import ( - BeautifulSoup, - BeautifulStoneSoup, - ) -from bs4.element import Comment, Doctype, SoupStrainer -from bs4.testing import skipIf -from bs4.tests import test_htmlparser -from bs4.testing import ( - HTMLTreeBuilderSmokeTest, - XMLTreeBuilderSmokeTest, - SoupTest, - skipIf, -) - -@skipIf( - not LXML_PRESENT, - "lxml seems not to be present, not testing its tree builder.") -class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): - """See ``HTMLTreeBuilderSmokeTest``.""" - - @property - def default_builder(self): - return LXMLTreeBuilder() - - def test_out_of_range_entity(self): - self.assertSoupEquals( - "

foo�bar

", "

foobar

") - self.assertSoupEquals( - "

foo�bar

", "

foobar

") - self.assertSoupEquals( - "

foo�bar

", "

foobar

") - - # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this - # test if an old version of lxml is installed. - - @skipIf( - not LXML_PRESENT or LXML_VERSION < (2,3,5,0), - "Skipping doctype test for old version of lxml to avoid segfault.") - def test_empty_doctype(self): - soup = self.soup("") - doctype = soup.contents[0] - self.assertEqual("", doctype.strip()) - - def test_beautifulstonesoup_is_xml_parser(self): - # Make sure that the deprecated BSS class uses an xml builder - # if one is installed. - with warnings.catch_warnings(record=True) as w: - soup = BeautifulStoneSoup("") - self.assertEqual(u"", unicode(soup.b)) - self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message)) - -@skipIf( - not LXML_PRESENT, - "lxml seems not to be present, not testing its XML tree builder.") -class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest): - """See ``HTMLTreeBuilderSmokeTest``.""" - - @property - def default_builder(self): - return LXMLTreeBuilderForXML() diff --git a/lib/bs4/bs4/tests/test_soup.py b/lib/bs4/bs4/tests/test_soup.py deleted file mode 100644 index f3e69edf..00000000 --- a/lib/bs4/bs4/tests/test_soup.py +++ /dev/null @@ -1,501 +0,0 @@ -# -*- coding: utf-8 -*- -"""Tests of Beautiful Soup as a whole.""" - -from pdb import set_trace -import logging -import unittest -import sys -import tempfile - -from bs4 import ( - BeautifulSoup, - BeautifulStoneSoup, -) -from bs4.element import ( - CharsetMetaAttributeValue, - ContentMetaAttributeValue, - SoupStrainer, - NamespacedAttribute, - ) -import bs4.dammit -from bs4.dammit import ( - EntitySubstitution, - UnicodeDammit, - EncodingDetector, -) -from bs4.testing import ( - SoupTest, - skipIf, -) -import warnings - -try: - from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML - LXML_PRESENT = True -except ImportError, e: - LXML_PRESENT = False - -PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2)) - -class TestConstructor(SoupTest): - - def test_short_unicode_input(self): - data = u"

éé

" - soup = self.soup(data) - self.assertEqual(u"éé", soup.h1.string) - - def test_embedded_null(self): - data = u"

foo\0bar

" - soup = self.soup(data) - self.assertEqual(u"foo\0bar", soup.h1.string) - - def test_exclude_encodings(self): - utf8_data = u"Räksmörgås".encode("utf-8") - soup = self.soup(utf8_data, exclude_encodings=["utf-8"]) - self.assertEqual("windows-1252", soup.original_encoding) - - -class TestWarnings(SoupTest): - - def _no_parser_specified(self, s, is_there=True): - v = s.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:80]) - self.assertTrue(v) - - def test_warning_if_no_parser_specified(self): - with warnings.catch_warnings(record=True) as w: - soup = self.soup("") - msg = str(w[0].message) - self._assert_no_parser_specified(msg) - - def test_warning_if_parser_specified_too_vague(self): - with warnings.catch_warnings(record=True) as w: - soup = self.soup("", "html") - msg = str(w[0].message) - self._assert_no_parser_specified(msg) - - def test_no_warning_if_explicit_parser_specified(self): - with warnings.catch_warnings(record=True) as w: - soup = self.soup("", "html.parser") - self.assertEqual([], w) - - def test_parseOnlyThese_renamed_to_parse_only(self): - with warnings.catch_warnings(record=True) as w: - soup = self.soup("", parseOnlyThese=SoupStrainer("b")) - msg = str(w[0].message) - self.assertTrue("parseOnlyThese" in msg) - self.assertTrue("parse_only" in msg) - self.assertEqual(b"", soup.encode()) - - def test_fromEncoding_renamed_to_from_encoding(self): - with warnings.catch_warnings(record=True) as w: - utf8 = b"\xc3\xa9" - soup = self.soup(utf8, fromEncoding="utf8") - msg = str(w[0].message) - self.assertTrue("fromEncoding" in msg) - self.assertTrue("from_encoding" in msg) - self.assertEqual("utf8", soup.original_encoding) - - def test_unrecognized_keyword_argument(self): - self.assertRaises( - TypeError, self.soup, "", no_such_argument=True) - -class TestWarnings(SoupTest): - - def test_disk_file_warning(self): - filehandle = tempfile.NamedTemporaryFile() - filename = filehandle.name - try: - with warnings.catch_warnings(record=True) as w: - soup = self.soup(filename) - msg = str(w[0].message) - self.assertTrue("looks like a filename" in msg) - finally: - filehandle.close() - - # The file no longer exists, so Beautiful Soup will no longer issue the warning. - with warnings.catch_warnings(record=True) as w: - soup = self.soup(filename) - self.assertEqual(0, len(w)) - - def test_url_warning_with_bytes_url(https://github.com/self): - with warnings.catch_warnings(record=True) as warning_list: - soup = self.soup(b"http://www.crummybytes.com/") - # Be aware this isn't the only warning that can be raised during - # execution.. - self.assertTrue(any("looks like a URL" in str(w.message) - for w in warning_list)) - - def test_url_warning_with_unicode_url(https://github.com/self): - with warnings.catch_warnings(record=True) as warning_list: - # note - this url must differ from the bytes one otherwise - # python's warnings system swallows the second warning - soup = self.soup(u"http://www.crummyunicode.com/") - self.assertTrue(any("looks like a URL" in str(w.message) - for w in warning_list)) - - def test_url_warning_with_bytes_and_space(self): - with warnings.catch_warnings(record=True) as warning_list: - soup = self.soup(b"http://www.crummybytes.com/%20is%20great") - self.assertFalse(any("looks like a URL" in str(w.message) - for w in warning_list)) - - def test_url_warning_with_unicode_and_space(self): - with warnings.catch_warnings(record=True) as warning_list: - soup = self.soup(u"http://www.crummyuncode.com/%20is%20great") - self.assertFalse(any("looks like a URL" in str(w.message) - for w in warning_list)) - - -class TestSelectiveParsing(SoupTest): - - def test_parse_with_soupstrainer(self): - markup = "NoYesNoYes Yes" - strainer = SoupStrainer("b") - soup = self.soup(markup, parse_only=strainer) - self.assertEqual(soup.encode(), b"YesYes Yes") - - -class TestEntitySubstitution(unittest.TestCase): - """Standalone tests of the EntitySubstitution class.""" - def setUp(self): - self.sub = EntitySubstitution - - def test_simple_html_substitution(self): - # Unicode characters corresponding to named HTML entites - # are substituted, and no others. - s = u"foo\u2200\N{SNOWMAN}\u00f5bar" - self.assertEqual(self.sub.substitute_html(s), - u"foo∀\N{SNOWMAN}õbar") - - def test_smart_quote_substitution(self): - # MS smart quotes are a common source of frustration, so we - # give them a special test. - quotes = b"\x91\x92foo\x93\x94" - dammit = UnicodeDammit(quotes) - self.assertEqual(self.sub.substitute_html(dammit.markup), - "‘’foo“”") - - def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self): - s = 'Welcome to "my bar"' - self.assertEqual(self.sub.substitute_xml(s, False), s) - - def test_xml_attribute_quoting_normally_uses_double_quotes(self): - self.assertEqual(self.sub.substitute_xml("Welcome", True), - '"Welcome"') - self.assertEqual(self.sub.substitute_xml("Bob's Bar", True), - '"Bob\'s Bar"') - - def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self): - s = 'Welcome to "my bar"' - self.assertEqual(self.sub.substitute_xml(s, True), - "'Welcome to \"my bar\"'") - - def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self): - s = 'Welcome to "Bob\'s Bar"' - self.assertEqual( - self.sub.substitute_xml(s, True), - '"Welcome to "Bob\'s Bar""') - - def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self): - quoted = 'Welcome to "Bob\'s Bar"' - self.assertEqual(self.sub.substitute_xml(quoted), quoted) - - def test_xml_quoting_handles_angle_brackets(self): - self.assertEqual( - self.sub.substitute_xml("foo"), - "foo<bar>") - - def test_xml_quoting_handles_ampersands(self): - self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&T") - - def test_xml_quoting_including_ampersands_when_they_are_part_of_an_entity(self): - self.assertEqual( - self.sub.substitute_xml("ÁT&T"), - "&Aacute;T&T") - - def test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entity(self): - self.assertEqual( - self.sub.substitute_xml_containing_entities("ÁT&T"), - "ÁT&T") - - def test_quotes_not_html_substituted(self): - """There's no need to do this except inside attribute values.""" - text = 'Bob\'s "bar"' - self.assertEqual(self.sub.substitute_html(text), text) - - -class TestEncodingConversion(SoupTest): - # Test Beautiful Soup's ability to decode and encode from various - # encodings. - - def setUp(self): - super(TestEncodingConversion, self).setUp() - self.unicode_data = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' - self.utf8_data = self.unicode_data.encode("utf-8") - # Just so you know what it looks like. - self.assertEqual( - self.utf8_data, - b'Sacr\xc3\xa9 bleu!') - - def test_ascii_in_unicode_out(self): - # ASCII input is converted to Unicode. The original_encoding - # attribute is set to 'utf-8', a superset of ASCII. - chardet = bs4.dammit.chardet_dammit - logging.disable(logging.WARNING) - try: - def noop(str): - return None - # Disable chardet, which will realize that the ASCII is ASCII. - bs4.dammit.chardet_dammit = noop - ascii = b"a" - soup_from_ascii = self.soup(ascii) - unicode_output = soup_from_ascii.decode() - self.assertTrue(isinstance(unicode_output, unicode)) - self.assertEqual(unicode_output, self.document_for(ascii.decode())) - self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8") - finally: - logging.disable(logging.NOTSET) - bs4.dammit.chardet_dammit = chardet - - def test_unicode_in_unicode_out(self): - # Unicode input is left alone. The original_encoding attribute - # is not set. - soup_from_unicode = self.soup(self.unicode_data) - self.assertEqual(soup_from_unicode.decode(), self.unicode_data) - self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!') - self.assertEqual(soup_from_unicode.original_encoding, None) - - def test_utf8_in_unicode_out(self): - # UTF-8 input is converted to Unicode. The original_encoding - # attribute is set. - soup_from_utf8 = self.soup(self.utf8_data) - self.assertEqual(soup_from_utf8.decode(), self.unicode_data) - self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!') - - def test_utf8_out(self): - # The internal data structures can be encoded as UTF-8. - soup_from_unicode = self.soup(self.unicode_data) - self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data) - - @skipIf( - PYTHON_3_PRE_3_2, - "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.") - def test_attribute_name_containing_unicode_characters(self): - markup = u'
' - self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8")) - -class TestUnicodeDammit(unittest.TestCase): - """Standalone tests of UnicodeDammit.""" - - def test_unicode_input(self): - markup = u"I'm already Unicode! \N{SNOWMAN}" - dammit = UnicodeDammit(markup) - self.assertEqual(dammit.unicode_markup, markup) - - def test_smart_quotes_to_unicode(self): - markup = b"\x91\x92\x93\x94" - dammit = UnicodeDammit(markup) - self.assertEqual( - dammit.unicode_markup, u"\u2018\u2019\u201c\u201d") - - def test_smart_quotes_to_xml_entities(self): - markup = b"\x91\x92\x93\x94" - dammit = UnicodeDammit(markup, smart_quotes_to="xml") - self.assertEqual( - dammit.unicode_markup, "‘’“”") - - def test_smart_quotes_to_html_entities(self): - markup = b"\x91\x92\x93\x94" - dammit = UnicodeDammit(markup, smart_quotes_to="html") - self.assertEqual( - dammit.unicode_markup, "‘’“”") - - def test_smart_quotes_to_ascii(self): - markup = b"\x91\x92\x93\x94" - dammit = UnicodeDammit(markup, smart_quotes_to="ascii") - self.assertEqual( - dammit.unicode_markup, """''""""") - - def test_detect_utf8(self): - utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83" - dammit = UnicodeDammit(utf8) - self.assertEqual(dammit.original_encoding.lower(), 'utf-8') - self.assertEqual(dammit.unicode_markup, u'Sacr\xe9 bleu! \N{SNOWMAN}') - - - def test_convert_hebrew(self): - hebrew = b"\xed\xe5\xec\xf9" - dammit = UnicodeDammit(hebrew, ["iso-8859-8"]) - self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8') - self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9') - - def test_dont_see_smart_quotes_where_there_are_none(self): - utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch" - dammit = UnicodeDammit(utf_8) - self.assertEqual(dammit.original_encoding.lower(), 'utf-8') - self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8) - - def test_ignore_inappropriate_codecs(self): - utf8_data = u"Räksmörgås".encode("utf-8") - dammit = UnicodeDammit(utf8_data, ["iso-8859-8"]) - self.assertEqual(dammit.original_encoding.lower(), 'utf-8') - - def test_ignore_invalid_codecs(self): - utf8_data = u"Räksmörgås".encode("utf-8") - for bad_encoding in ['.utf8', '...', 'utF---16.!']: - dammit = UnicodeDammit(utf8_data, [bad_encoding]) - self.assertEqual(dammit.original_encoding.lower(), 'utf-8') - - def test_exclude_encodings(self): - # This is UTF-8. - utf8_data = u"Räksmörgås".encode("utf-8") - - # But if we exclude UTF-8 from consideration, the guess is - # Windows-1252. - dammit = UnicodeDammit(utf8_data, exclude_encodings=["utf-8"]) - self.assertEqual(dammit.original_encoding.lower(), 'windows-1252') - - # And if we exclude that, there is no valid guess at all. - dammit = UnicodeDammit( - utf8_data, exclude_encodings=["utf-8", "windows-1252"]) - self.assertEqual(dammit.original_encoding, None) - - def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self): - detected = EncodingDetector( - b'') - encodings = list(detected.encodings) - assert u'utf-\N{REPLACEMENT CHARACTER}' in encodings - - def test_detect_html5_style_meta_tag(self): - - for data in ( - b'', - b"", - b"", - b""): - dammit = UnicodeDammit(data, is_html=True) - self.assertEqual( - "euc-jp", dammit.original_encoding) - - def test_last_ditch_entity_replacement(self): - # This is a UTF-8 document that contains bytestrings - # completely incompatible with UTF-8 (ie. encoded with some other - # encoding). - # - # Since there is no consistent encoding for the document, - # Unicode, Dammit will eventually encode the document as UTF-8 - # and encode the incompatible characters as REPLACEMENT - # CHARACTER. - # - # If chardet is installed, it will detect that the document - # can be converted into ISO-8859-1 without errors. This happens - # to be the wrong encoding, but it is a consistent encoding, so the - # code we're testing here won't run. - # - # So we temporarily disable chardet if it's present. - doc = b"""\357\273\277 -\330\250\330\252\330\261 -\310\322\321\220\312\321\355\344""" - chardet = bs4.dammit.chardet_dammit - logging.disable(logging.WARNING) - try: - def noop(str): - return None - bs4.dammit.chardet_dammit = noop - dammit = UnicodeDammit(doc) - self.assertEqual(True, dammit.contains_replacement_characters) - self.assertTrue(u"\ufffd" in dammit.unicode_markup) - - soup = BeautifulSoup(doc, "html.parser") - self.assertTrue(soup.contains_replacement_characters) - finally: - logging.disable(logging.NOTSET) - bs4.dammit.chardet_dammit = chardet - - def test_byte_order_mark_removed(self): - # A document written in UTF-16LE will have its byte order marker stripped. - data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00' - dammit = UnicodeDammit(data) - self.assertEqual(u"áé", dammit.unicode_markup) - self.assertEqual("utf-16le", dammit.original_encoding) - - def test_detwingle(self): - # Here's a UTF8 document. - utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8") - - # Here's a Windows-1252 document. - windows_1252 = ( - u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!" - u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252") - - # Through some unholy alchemy, they've been stuck together. - doc = utf8 + windows_1252 + utf8 - - # The document can't be turned into UTF-8: - self.assertRaises(UnicodeDecodeError, doc.decode, "utf8") - - # Unicode, Dammit thinks the whole document is Windows-1252, - # and decodes it into "☃☃☃“Hi, I like Windows!”☃☃☃" - - # But if we run it through fix_embedded_windows_1252, it's fixed: - - fixed = UnicodeDammit.detwingle(doc) - self.assertEqual( - u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8")) - - def test_detwingle_ignores_multibyte_characters(self): - # Each of these characters has a UTF-8 representation ending - # in \x93. \x93 is a smart quote if interpreted as - # Windows-1252. But our code knows to skip over multibyte - # UTF-8 characters, so they'll survive the process unscathed. - for tricky_unicode_char in ( - u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93' - u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93' - u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one. - ): - input = tricky_unicode_char.encode("utf8") - self.assertTrue(input.endswith(b'\x93')) - output = UnicodeDammit.detwingle(input) - self.assertEqual(output, input) - -class TestNamedspacedAttribute(SoupTest): - - def test_name_may_be_none(self): - a = NamespacedAttribute("xmlns", None) - self.assertEqual(a, "xmlns") - - def test_attribute_is_equivalent_to_colon_separated_string(self): - a = NamespacedAttribute("a", "b") - self.assertEqual("a:b", a) - - def test_attributes_are_equivalent_if_prefix_and_name_identical(self): - a = NamespacedAttribute("a", "b", "c") - b = NamespacedAttribute("a", "b", "c") - self.assertEqual(a, b) - - # The actual namespace is not considered. - c = NamespacedAttribute("a", "b", None) - self.assertEqual(a, c) - - # But name and prefix are important. - d = NamespacedAttribute("a", "z", "c") - self.assertNotEqual(a, d) - - e = NamespacedAttribute("z", "b", "c") - self.assertNotEqual(a, e) - - -class TestAttributeValueWithCharsetSubstitution(unittest.TestCase): - - def test_content_meta_attribute_value(self): - value = CharsetMetaAttributeValue("euc-jp") - self.assertEqual("euc-jp", value) - self.assertEqual("euc-jp", value.original_value) - self.assertEqual("utf8", value.encode("utf8")) - - - def test_content_meta_attribute_value(self): - value = ContentMetaAttributeValue("text/html; charset=euc-jp") - self.assertEqual("text/html; charset=euc-jp", value) - self.assertEqual("text/html; charset=euc-jp", value.original_value) - self.assertEqual("text/html; charset=utf8", value.encode("utf8")) diff --git a/lib/bs4/bs4/tests/test_tree.py b/lib/bs4/bs4/tests/test_tree.py deleted file mode 100644 index a4fe0b16..00000000 --- a/lib/bs4/bs4/tests/test_tree.py +++ /dev/null @@ -1,2044 +0,0 @@ -# -*- coding: utf-8 -*- -"""Tests for Beautiful Soup's tree traversal methods. - -The tree traversal methods are the main advantage of using Beautiful -Soup over just using a parser. - -Different parsers will build different Beautiful Soup trees given the -same markup, but all Beautiful Soup trees can be traversed with the -methods tested here. -""" - -from pdb import set_trace -import copy -import pickle -import re -import warnings -from bs4 import BeautifulSoup -from bs4.builder import ( - builder_registry, - HTMLParserTreeBuilder, -) -from bs4.element import ( - PY3K, - CData, - Comment, - Declaration, - Doctype, - NavigableString, - SoupStrainer, - Tag, -) -from bs4.testing import ( - SoupTest, - skipIf, -) - -XML_BUILDER_PRESENT = (builder_registry.lookup("xml") is not None) -LXML_PRESENT = (builder_registry.lookup("lxml") is not None) - -class TreeTest(SoupTest): - - def assertSelects(self, tags, should_match): - """Make sure that the given tags have the correct text. - - This is used in tests that define a bunch of tags, each - containing a single string, and then select certain strings by - some mechanism. - """ - self.assertEqual([tag.string for tag in tags], should_match) - - def assertSelectsIDs(self, tags, should_match): - """Make sure that the given tags have the correct IDs. - - This is used in tests that define a bunch of tags, each - containing a single string, and then select certain strings by - some mechanism. - """ - self.assertEqual([tag['id'] for tag in tags], should_match) - - -class TestFind(TreeTest): - """Basic tests of the find() method. - - find() just calls find_all() with limit=1, so it's not tested all - that thouroughly here. - """ - - def test_find_tag(self): - soup = self.soup("1234") - self.assertEqual(soup.find("b").string, "2") - - def test_unicode_text_find(self): - soup = self.soup(u'

Räksmörgås

') - self.assertEqual(soup.find(string=u'Räksmörgås'), u'Räksmörgås') - - def test_unicode_attribute_find(self): - soup = self.soup(u'

here it is

') - str(soup) - self.assertEqual("here it is", soup.find(id=u'Räksmörgås').text) - - - def test_find_everything(self): - """Test an optimization that finds all tags.""" - soup = self.soup("foobar") - self.assertEqual(2, len(soup.find_all())) - - def test_find_everything_with_name(self): - """Test an optimization that finds all tags with a given name.""" - soup = self.soup("foobarbaz") - self.assertEqual(2, len(soup.find_all('a'))) - -class TestFindAll(TreeTest): - """Basic tests of the find_all() method.""" - - def test_find_all_text_nodes(self): - """You can search the tree for text nodes.""" - soup = self.soup("Foobar\xbb") - # Exact match. - self.assertEqual(soup.find_all(string="bar"), [u"bar"]) - self.assertEqual(soup.find_all(text="bar"), [u"bar"]) - # Match any of a number of strings. - self.assertEqual( - soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"]) - # Match a regular expression. - self.assertEqual(soup.find_all(text=re.compile('.*')), - [u"Foo", u"bar", u'\xbb']) - # Match anything. - self.assertEqual(soup.find_all(text=True), - [u"Foo", u"bar", u'\xbb']) - - def test_find_all_limit(self): - """You can limit the number of items returned by find_all.""" - soup = self.soup("12345") - self.assertSelects(soup.find_all('a', limit=3), ["1", "2", "3"]) - self.assertSelects(soup.find_all('a', limit=1), ["1"]) - self.assertSelects( - soup.find_all('a', limit=10), ["1", "2", "3", "4", "5"]) - - # A limit of 0 means no limit. - self.assertSelects( - soup.find_all('a', limit=0), ["1", "2", "3", "4", "5"]) - - def test_calling_a_tag_is_calling_findall(self): - soup = self.soup("123") - self.assertSelects(soup('a', limit=1), ["1"]) - self.assertSelects(soup.b(id="foo"), ["3"]) - - def test_find_all_with_self_referential_data_structure_does_not_cause_infinite_recursion(self): - soup = self.soup("") - # Create a self-referential list. - l = [] - l.append(l) - - # Without special code in _normalize_search_value, this would cause infinite - # recursion. - self.assertEqual([], soup.find_all(l)) - - def test_find_all_resultset(self): - """All find_all calls return a ResultSet""" - soup = self.soup("") - result = soup.find_all("a") - self.assertTrue(hasattr(result, "source")) - - result = soup.find_all(True) - self.assertTrue(hasattr(result, "source")) - - result = soup.find_all(text="foo") - self.assertTrue(hasattr(result, "source")) - - -class TestFindAllBasicNamespaces(TreeTest): - - def test_find_by_namespaced_name(self): - soup = self.soup('4') - self.assertEqual("4", soup.find("mathml:msqrt").string) - self.assertEqual("a", soup.find(attrs= { "svg:fill" : "red" }).name) - - -class TestFindAllByName(TreeTest): - """Test ways of finding tags by tag name.""" - - def setUp(self): - super(TreeTest, self).setUp() - self.tree = self.soup("""First tag. - Second tag. - Third Nested tag. tag.""") - - def test_find_all_by_tag_name(self): - # Find all the tags. - self.assertSelects( - self.tree.find_all('a'), ['First tag.', 'Nested tag.']) - - def test_find_all_by_name_and_text(self): - self.assertSelects( - self.tree.find_all('a', text='First tag.'), ['First tag.']) - - self.assertSelects( - self.tree.find_all('a', text=True), ['First tag.', 'Nested tag.']) - - self.assertSelects( - self.tree.find_all('a', text=re.compile("tag")), - ['First tag.', 'Nested tag.']) - - - def test_find_all_on_non_root_element(self): - # You can call find_all on any node, not just the root. - self.assertSelects(self.tree.c.find_all('a'), ['Nested tag.']) - - def test_calling_element_invokes_find_all(self): - self.assertSelects(self.tree('a'), ['First tag.', 'Nested tag.']) - - def test_find_all_by_tag_strainer(self): - self.assertSelects( - self.tree.find_all(SoupStrainer('a')), - ['First tag.', 'Nested tag.']) - - def test_find_all_by_tag_names(self): - self.assertSelects( - self.tree.find_all(['a', 'b']), - ['First tag.', 'Second tag.', 'Nested tag.']) - - def test_find_all_by_tag_dict(self): - self.assertSelects( - self.tree.find_all({'a' : True, 'b' : True}), - ['First tag.', 'Second tag.', 'Nested tag.']) - - def test_find_all_by_tag_re(self): - self.assertSelects( - self.tree.find_all(re.compile('^[ab]$')), - ['First tag.', 'Second tag.', 'Nested tag.']) - - def test_find_all_with_tags_matching_method(self): - # You can define an oracle method that determines whether - # a tag matches the search. - def id_matches_name(tag): - return tag.name == tag.get('id') - - tree = self.soup("""Match 1. - Does not match. - Match 2.""") - - self.assertSelects( - tree.find_all(id_matches_name), ["Match 1.", "Match 2."]) - - def test_find_with_multi_valued_attribute(self): - soup = self.soup( - "
1
2
3
" - ) - r1 = soup.find('div', 'a d'); - r2 = soup.find('div', re.compile(r'a d')); - r3, r4 = soup.find_all('div', ['a b', 'a d']); - self.assertEqual('3', r1.string) - self.assertEqual('3', r2.string) - self.assertEqual('1', r3.string) - self.assertEqual('3', r4.string) - -class TestFindAllByAttribute(TreeTest): - - def test_find_all_by_attribute_name(self): - # You can pass in keyword arguments to find_all to search by - # attribute. - tree = self.soup(""" - Matching a. - - Non-matching Matching b.a. - """) - self.assertSelects(tree.find_all(id='first'), - ["Matching a.", "Matching b."]) - - def test_find_all_by_utf8_attribute_value(self): - peace = u"םולש".encode("utf8") - data = u''.encode("utf8") - soup = self.soup(data) - self.assertEqual([soup.a], soup.find_all(title=peace)) - self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8"))) - self.assertEqual([soup.a], soup.find_all(title=[peace, "something else"])) - - def test_find_all_by_attribute_dict(self): - # You can pass in a dictionary as the argument 'attrs'. This - # lets you search for attributes like 'name' (a fixed argument - # to find_all) and 'class' (a reserved word in Python.) - tree = self.soup(""" - Name match. - Class match. - Non-match. - A tag called 'name1'. - """) - - # This doesn't do what you want. - self.assertSelects(tree.find_all(name='name1'), - ["A tag called 'name1'."]) - # This does what you want. - self.assertSelects(tree.find_all(attrs={'name' : 'name1'}), - ["Name match."]) - - self.assertSelects(tree.find_all(attrs={'class' : 'class2'}), - ["Class match."]) - - def test_find_all_by_class(self): - tree = self.soup(""" - Class 1. - Class 2. - Class 1. - Class 3 and 4. - """) - - # Passing in the class_ keyword argument will search against - # the 'class' attribute. - self.assertSelects(tree.find_all('a', class_='1'), ['Class 1.']) - self.assertSelects(tree.find_all('c', class_='3'), ['Class 3 and 4.']) - self.assertSelects(tree.find_all('c', class_='4'), ['Class 3 and 4.']) - - # Passing in a string to 'attrs' will also search the CSS class. - self.assertSelects(tree.find_all('a', '1'), ['Class 1.']) - self.assertSelects(tree.find_all(attrs='1'), ['Class 1.', 'Class 1.']) - self.assertSelects(tree.find_all('c', '3'), ['Class 3 and 4.']) - self.assertSelects(tree.find_all('c', '4'), ['Class 3 and 4.']) - - def test_find_by_class_when_multiple_classes_present(self): - tree = self.soup("Found it") - - f = tree.find_all("gar", class_=re.compile("o")) - self.assertSelects(f, ["Found it"]) - - f = tree.find_all("gar", class_=re.compile("a")) - self.assertSelects(f, ["Found it"]) - - # If the search fails to match the individual strings "foo" and "bar", - # it will be tried against the combined string "foo bar". - f = tree.find_all("gar", class_=re.compile("o b")) - self.assertSelects(f, ["Found it"]) - - def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self): - soup = self.soup("Found it") - - self.assertSelects(soup.find_all("a", re.compile("ba")), ["Found it"]) - - def big_attribute_value(value): - return len(value) > 3 - - self.assertSelects(soup.find_all("a", big_attribute_value), []) - - def small_attribute_value(value): - return len(value) <= 3 - - self.assertSelects( - soup.find_all("a", small_attribute_value), ["Found it"]) - - def test_find_all_with_string_for_attrs_finds_multiple_classes(self): - soup = self.soup('') - a, a2 = soup.find_all("a") - self.assertEqual([a, a2], soup.find_all("a", "foo")) - self.assertEqual([a], soup.find_all("a", "bar")) - - # If you specify the class as a string that contains a - # space, only that specific value will be found. - self.assertEqual([a], soup.find_all("a", class_="foo bar")) - self.assertEqual([a], soup.find_all("a", "foo bar")) - self.assertEqual([], soup.find_all("a", "bar foo")) - - def test_find_all_by_attribute_soupstrainer(self): - tree = self.soup(""" - Match. - Non-match.""") - - strainer = SoupStrainer(attrs={'id' : 'first'}) - self.assertSelects(tree.find_all(strainer), ['Match.']) - - def test_find_all_with_missing_attribute(self): - # You can pass in None as the value of an attribute to find_all. - # This will match tags that do not have that attribute set. - tree = self.soup("""ID present. - No ID present. - ID is empty.""") - self.assertSelects(tree.find_all('a', id=None), ["No ID present."]) - - def test_find_all_with_defined_attribute(self): - # You can pass in None as the value of an attribute to find_all. - # This will match tags that have that attribute set to any value. - tree = self.soup("""ID present. - No ID present. - ID is empty.""") - self.assertSelects( - tree.find_all(id=True), ["ID present.", "ID is empty."]) - - def test_find_all_with_numeric_attribute(self): - # If you search for a number, it's treated as a string. - tree = self.soup("""Unquoted attribute. - Quoted attribute.""") - - expected = ["Unquoted attribute.", "Quoted attribute."] - self.assertSelects(tree.find_all(id=1), expected) - self.assertSelects(tree.find_all(id="1"), expected) - - def test_find_all_with_list_attribute_values(self): - # You can pass a list of attribute values instead of just one, - # and you'll get tags that match any of the values. - tree = self.soup("""1 - 2 - 3 - No ID.""") - self.assertSelects(tree.find_all(id=["1", "3", "4"]), - ["1", "3"]) - - def test_find_all_with_regular_expression_attribute_value(self): - # You can pass a regular expression as an attribute value, and - # you'll get tags whose values for that attribute match the - # regular expression. - tree = self.soup("""One a. - Two as. - Mixed as and bs. - One b. - No ID.""") - - self.assertSelects(tree.find_all(id=re.compile("^a+$")), - ["One a.", "Two as."]) - - def test_find_by_name_and_containing_string(self): - soup = self.soup("foobarfoo") - a = soup.a - - self.assertEqual([a], soup.find_all("a", text="foo")) - self.assertEqual([], soup.find_all("a", text="bar")) - self.assertEqual([], soup.find_all("a", text="bar")) - - def test_find_by_name_and_containing_string_when_string_is_buried(self): - soup = self.soup("foofoo") - self.assertEqual(soup.find_all("a"), soup.find_all("a", text="foo")) - - def test_find_by_attribute_and_containing_string(self): - soup = self.soup('foofoo') - a = soup.a - - self.assertEqual([a], soup.find_all(id=2, text="foo")) - self.assertEqual([], soup.find_all(id=1, text="bar")) - - - - -class TestIndex(TreeTest): - """Test Tag.index""" - def test_index(self): - tree = self.soup("""
- Identical - Not identical - Identical - - Identical with child - Also not identical - Identical with child -
""") - div = tree.div - for i, element in enumerate(div.contents): - self.assertEqual(i, div.index(element)) - self.assertRaises(ValueError, tree.index, 1) - - -class TestParentOperations(TreeTest): - """Test navigation and searching through an element's parents.""" - - def setUp(self): - super(TestParentOperations, self).setUp() - self.tree = self.soup('''
    -
      -
        -
          - Start here -
        -
      ''') - self.start = self.tree.b - - - def test_parent(self): - self.assertEqual(self.start.parent['id'], 'bottom') - self.assertEqual(self.start.parent.parent['id'], 'middle') - self.assertEqual(self.start.parent.parent.parent['id'], 'top') - - def test_parent_of_top_tag_is_soup_object(self): - top_tag = self.tree.contents[0] - self.assertEqual(top_tag.parent, self.tree) - - def test_soup_object_has_no_parent(self): - self.assertEqual(None, self.tree.parent) - - def test_find_parents(self): - self.assertSelectsIDs( - self.start.find_parents('ul'), ['bottom', 'middle', 'top']) - self.assertSelectsIDs( - self.start.find_parents('ul', id="middle"), ['middle']) - - def test_find_parent(self): - self.assertEqual(self.start.find_parent('ul')['id'], 'bottom') - self.assertEqual(self.start.find_parent('ul', id='top')['id'], 'top') - - def test_parent_of_text_element(self): - text = self.tree.find(text="Start here") - self.assertEqual(text.parent.name, 'b') - - def test_text_element_find_parent(self): - text = self.tree.find(text="Start here") - self.assertEqual(text.find_parent('ul')['id'], 'bottom') - - def test_parent_generator(self): - parents = [parent['id'] for parent in self.start.parents - if parent is not None and 'id' in parent.attrs] - self.assertEqual(parents, ['bottom', 'middle', 'top']) - - -class ProximityTest(TreeTest): - - def setUp(self): - super(TreeTest, self).setUp() - self.tree = self.soup( - 'OneTwoThree') - - -class TestNextOperations(ProximityTest): - - def setUp(self): - super(TestNextOperations, self).setUp() - self.start = self.tree.b - - def test_next(self): - self.assertEqual(self.start.next_element, "One") - self.assertEqual(self.start.next_element.next_element['id'], "2") - - def test_next_of_last_item_is_none(self): - last = self.tree.find(text="Three") - self.assertEqual(last.next_element, None) - - def test_next_of_root_is_none(self): - # The document root is outside the next/previous chain. - self.assertEqual(self.tree.next_element, None) - - def test_find_all_next(self): - self.assertSelects(self.start.find_all_next('b'), ["Two", "Three"]) - self.start.find_all_next(id=3) - self.assertSelects(self.start.find_all_next(id=3), ["Three"]) - - def test_find_next(self): - self.assertEqual(self.start.find_next('b')['id'], '2') - self.assertEqual(self.start.find_next(text="Three"), "Three") - - def test_find_next_for_text_element(self): - text = self.tree.find(text="One") - self.assertEqual(text.find_next("b").string, "Two") - self.assertSelects(text.find_all_next("b"), ["Two", "Three"]) - - def test_next_generator(self): - start = self.tree.find(text="Two") - successors = [node for node in start.next_elements] - # There are two successors: the final tag and its text contents. - tag, contents = successors - self.assertEqual(tag['id'], '3') - self.assertEqual(contents, "Three") - -class TestPreviousOperations(ProximityTest): - - def setUp(self): - super(TestPreviousOperations, self).setUp() - self.end = self.tree.find(text="Three") - - def test_previous(self): - self.assertEqual(self.end.previous_element['id'], "3") - self.assertEqual(self.end.previous_element.previous_element, "Two") - - def test_previous_of_first_item_is_none(self): - first = self.tree.find('html') - self.assertEqual(first.previous_element, None) - - def test_previous_of_root_is_none(self): - # The document root is outside the next/previous chain. - # XXX This is broken! - #self.assertEqual(self.tree.previous_element, None) - pass - - def test_find_all_previous(self): - # The tag containing the "Three" node is the predecessor - # of the "Three" node itself, which is why "Three" shows up - # here. - self.assertSelects( - self.end.find_all_previous('b'), ["Three", "Two", "One"]) - self.assertSelects(self.end.find_all_previous(id=1), ["One"]) - - def test_find_previous(self): - self.assertEqual(self.end.find_previous('b')['id'], '3') - self.assertEqual(self.end.find_previous(text="One"), "One") - - def test_find_previous_for_text_element(self): - text = self.tree.find(text="Three") - self.assertEqual(text.find_previous("b").string, "Three") - self.assertSelects( - text.find_all_previous("b"), ["Three", "Two", "One"]) - - def test_previous_generator(self): - start = self.tree.find(text="One") - predecessors = [node for node in start.previous_elements] - - # There are four predecessors: the tag containing "One" - # the tag, the tag, and the tag. - b, body, head, html = predecessors - self.assertEqual(b['id'], '1') - self.assertEqual(body.name, "body") - self.assertEqual(head.name, "head") - self.assertEqual(html.name, "html") - - -class SiblingTest(TreeTest): - - def setUp(self): - super(SiblingTest, self).setUp() - markup = ''' - - - - - - - - - - - ''' - # All that whitespace looks good but makes the tests more - # difficult. Get rid of it. - markup = re.compile("\n\s*").sub("", markup) - self.tree = self.soup(markup) - - -class TestNextSibling(SiblingTest): - - def setUp(self): - super(TestNextSibling, self).setUp() - self.start = self.tree.find(id="1") - - def test_next_sibling_of_root_is_none(self): - self.assertEqual(self.tree.next_sibling, None) - - def test_next_sibling(self): - self.assertEqual(self.start.next_sibling['id'], '2') - self.assertEqual(self.start.next_sibling.next_sibling['id'], '3') - - # Note the difference between next_sibling and next_element. - self.assertEqual(self.start.next_element['id'], '1.1') - - def test_next_sibling_may_not_exist(self): - self.assertEqual(self.tree.html.next_sibling, None) - - nested_span = self.tree.find(id="1.1") - self.assertEqual(nested_span.next_sibling, None) - - last_span = self.tree.find(id="4") - self.assertEqual(last_span.next_sibling, None) - - def test_find_next_sibling(self): - self.assertEqual(self.start.find_next_sibling('span')['id'], '2') - - def test_next_siblings(self): - self.assertSelectsIDs(self.start.find_next_siblings("span"), - ['2', '3', '4']) - - self.assertSelectsIDs(self.start.find_next_siblings(id='3'), ['3']) - - def test_next_sibling_for_text_element(self): - soup = self.soup("Foobarbaz") - start = soup.find(text="Foo") - self.assertEqual(start.next_sibling.name, 'b') - self.assertEqual(start.next_sibling.next_sibling, 'baz') - - self.assertSelects(start.find_next_siblings('b'), ['bar']) - self.assertEqual(start.find_next_sibling(text="baz"), "baz") - self.assertEqual(start.find_next_sibling(text="nonesuch"), None) - - -class TestPreviousSibling(SiblingTest): - - def setUp(self): - super(TestPreviousSibling, self).setUp() - self.end = self.tree.find(id="4") - - def test_previous_sibling_of_root_is_none(self): - self.assertEqual(self.tree.previous_sibling, None) - - def test_previous_sibling(self): - self.assertEqual(self.end.previous_sibling['id'], '3') - self.assertEqual(self.end.previous_sibling.previous_sibling['id'], '2') - - # Note the difference between previous_sibling and previous_element. - self.assertEqual(self.end.previous_element['id'], '3.1') - - def test_previous_sibling_may_not_exist(self): - self.assertEqual(self.tree.html.previous_sibling, None) - - nested_span = self.tree.find(id="1.1") - self.assertEqual(nested_span.previous_sibling, None) - - first_span = self.tree.find(id="1") - self.assertEqual(first_span.previous_sibling, None) - - def test_find_previous_sibling(self): - self.assertEqual(self.end.find_previous_sibling('span')['id'], '3') - - def test_previous_siblings(self): - self.assertSelectsIDs(self.end.find_previous_siblings("span"), - ['3', '2', '1']) - - self.assertSelectsIDs(self.end.find_previous_siblings(id='1'), ['1']) - - def test_previous_sibling_for_text_element(self): - soup = self.soup("Foobarbaz") - start = soup.find(text="baz") - self.assertEqual(start.previous_sibling.name, 'b') - self.assertEqual(start.previous_sibling.previous_sibling, 'Foo') - - self.assertSelects(start.find_previous_siblings('b'), ['bar']) - self.assertEqual(start.find_previous_sibling(text="Foo"), "Foo") - self.assertEqual(start.find_previous_sibling(text="nonesuch"), None) - - -class TestTagCreation(SoupTest): - """Test the ability to create new tags.""" - def test_new_tag(self): - soup = self.soup("") - new_tag = soup.new_tag("foo", bar="baz") - self.assertTrue(isinstance(new_tag, Tag)) - self.assertEqual("foo", new_tag.name) - self.assertEqual(dict(bar="baz"), new_tag.attrs) - self.assertEqual(None, new_tag.parent) - - def test_tag_inherits_self_closing_rules_from_builder(self): - if XML_BUILDER_PRESENT: - xml_soup = BeautifulSoup("", "lxml-xml") - xml_br = xml_soup.new_tag("br") - xml_p = xml_soup.new_tag("p") - - # Both the
      and

      tag are empty-element, just because - # they have no contents. - self.assertEqual(b"
      ", xml_br.encode()) - self.assertEqual(b"

      ", xml_p.encode()) - - html_soup = BeautifulSoup("", "html.parser") - html_br = html_soup.new_tag("br") - html_p = html_soup.new_tag("p") - - # The HTML builder users HTML's rules about which tags are - # empty-element tags, and the new tags reflect these rules. - self.assertEqual(b"
      ", html_br.encode()) - self.assertEqual(b"

      ", html_p.encode()) - - def test_new_string_creates_navigablestring(self): - soup = self.soup("") - s = soup.new_string("foo") - self.assertEqual("foo", s) - self.assertTrue(isinstance(s, NavigableString)) - - def test_new_string_can_create_navigablestring_subclass(self): - soup = self.soup("") - s = soup.new_string("foo", Comment) - self.assertEqual("foo", s) - self.assertTrue(isinstance(s, Comment)) - -class TestTreeModification(SoupTest): - - def test_attribute_modification(self): - soup = self.soup('') - soup.a['id'] = 2 - self.assertEqual(soup.decode(), self.document_for('')) - del(soup.a['id']) - self.assertEqual(soup.decode(), self.document_for('')) - soup.a['id2'] = 'foo' - self.assertEqual(soup.decode(), self.document_for('')) - - def test_new_tag_creation(self): - builder = builder_registry.lookup('html')() - soup = self.soup("", builder=builder) - a = Tag(soup, builder, 'a') - ol = Tag(soup, builder, 'ol') - a['href'] = 'http://foo.com/' - soup.body.insert(0, a) - soup.body.insert(1, ol) - self.assertEqual( - soup.body.encode(), - b'
        ') - - def test_append_to_contents_moves_tag(self): - doc = """

        Don't leave me here.

        -

        Don\'t leave!

        """ - soup = self.soup(doc) - second_para = soup.find(id='2') - bold = soup.b - - # Move the tag to the end of the second paragraph. - soup.find(id='2').append(soup.b) - - # The tag is now a child of the second paragraph. - self.assertEqual(bold.parent, second_para) - - self.assertEqual( - soup.decode(), self.document_for( - '

        Don\'t leave me .

        \n' - '

        Don\'t leave!here

        ')) - - def test_replace_with_returns_thing_that_was_replaced(self): - text = "" - soup = self.soup(text) - a = soup.a - new_a = a.replace_with(soup.c) - self.assertEqual(a, new_a) - - def test_unwrap_returns_thing_that_was_replaced(self): - text = "" - soup = self.soup(text) - a = soup.a - new_a = a.unwrap() - self.assertEqual(a, new_a) - - def test_replace_with_and_unwrap_give_useful_exception_when_tag_has_no_parent(self): - soup = self.soup("FooBar") - a = soup.a - a.extract() - self.assertEqual(None, a.parent) - self.assertRaises(ValueError, a.unwrap) - self.assertRaises(ValueError, a.replace_with, soup.c) - - def test_replace_tag_with_itself(self): - text = "Foo" - soup = self.soup(text) - c = soup.c - soup.c.replace_with(c) - self.assertEqual(soup.decode(), self.document_for(text)) - - def test_replace_tag_with_its_parent_raises_exception(self): - text = "" - soup = self.soup(text) - self.assertRaises(ValueError, soup.b.replace_with, soup.a) - - def test_insert_tag_into_itself_raises_exception(self): - text = "" - soup = self.soup(text) - self.assertRaises(ValueError, soup.a.insert, 0, soup.a) - - def test_replace_with_maintains_next_element_throughout(self): - soup = self.soup('

        onethree

        ') - a = soup.a - b = a.contents[0] - # Make it so the tag has two text children. - a.insert(1, "two") - - # Now replace each one with the empty string. - left, right = a.contents - left.replaceWith('') - right.replaceWith('') - - # The tag is still connected to the tree. - self.assertEqual("three", soup.b.string) - - def test_replace_final_node(self): - soup = self.soup("Argh!") - soup.find(text="Argh!").replace_with("Hooray!") - new_text = soup.find(text="Hooray!") - b = soup.b - self.assertEqual(new_text.previous_element, b) - self.assertEqual(new_text.parent, b) - self.assertEqual(new_text.previous_element.next_element, new_text) - self.assertEqual(new_text.next_element, None) - - def test_consecutive_text_nodes(self): - # A builder should never create two consecutive text nodes, - # but if you insert one next to another, Beautiful Soup will - # handle it correctly. - soup = self.soup("Argh!") - soup.b.insert(1, "Hooray!") - - self.assertEqual( - soup.decode(), self.document_for( - "Argh!Hooray!")) - - new_text = soup.find(text="Hooray!") - self.assertEqual(new_text.previous_element, "Argh!") - self.assertEqual(new_text.previous_element.next_element, new_text) - - self.assertEqual(new_text.previous_sibling, "Argh!") - self.assertEqual(new_text.previous_sibling.next_sibling, new_text) - - self.assertEqual(new_text.next_sibling, None) - self.assertEqual(new_text.next_element, soup.c) - - def test_insert_string(self): - soup = self.soup("") - soup.a.insert(0, "bar") - soup.a.insert(0, "foo") - # The string were added to the tag. - self.assertEqual(["foo", "bar"], soup.a.contents) - # And they were converted to NavigableStrings. - self.assertEqual(soup.a.contents[0].next_element, "bar") - - def test_insert_tag(self): - builder = self.default_builder - soup = self.soup( - "Findlady!", builder=builder) - magic_tag = Tag(soup, builder, 'magictag') - magic_tag.insert(0, "the") - soup.a.insert(1, magic_tag) - - self.assertEqual( - soup.decode(), self.document_for( - "Findthelady!")) - - # Make sure all the relationships are hooked up correctly. - b_tag = soup.b - self.assertEqual(b_tag.next_sibling, magic_tag) - self.assertEqual(magic_tag.previous_sibling, b_tag) - - find = b_tag.find(text="Find") - self.assertEqual(find.next_element, magic_tag) - self.assertEqual(magic_tag.previous_element, find) - - c_tag = soup.c - self.assertEqual(magic_tag.next_sibling, c_tag) - self.assertEqual(c_tag.previous_sibling, magic_tag) - - the = magic_tag.find(text="the") - self.assertEqual(the.parent, magic_tag) - self.assertEqual(the.next_element, c_tag) - self.assertEqual(c_tag.previous_element, the) - - def test_append_child_thats_already_at_the_end(self): - data = "" - soup = self.soup(data) - soup.a.append(soup.b) - self.assertEqual(data, soup.decode()) - - def test_move_tag_to_beginning_of_parent(self): - data = "" - soup = self.soup(data) - soup.a.insert(0, soup.d) - self.assertEqual("", soup.decode()) - - def test_insert_works_on_empty_element_tag(self): - # This is a little strange, since most HTML parsers don't allow - # markup like this to come through. But in general, we don't - # know what the parser would or wouldn't have allowed, so - # I'm letting this succeed for now. - soup = self.soup("
        ") - soup.br.insert(1, "Contents") - self.assertEqual(str(soup.br), "
        Contents
        ") - - def test_insert_before(self): - soup = self.soup("foobar") - soup.b.insert_before("BAZ") - soup.a.insert_before("QUUX") - self.assertEqual( - soup.decode(), self.document_for("QUUXfooBAZbar")) - - soup.a.insert_before(soup.b) - self.assertEqual( - soup.decode(), self.document_for("QUUXbarfooBAZ")) - - def test_insert_after(self): - soup = self.soup("foobar") - soup.b.insert_after("BAZ") - soup.a.insert_after("QUUX") - self.assertEqual( - soup.decode(), self.document_for("fooQUUXbarBAZ")) - soup.b.insert_after(soup.a) - self.assertEqual( - soup.decode(), self.document_for("QUUXbarfooBAZ")) - - def test_insert_after_raises_exception_if_after_has_no_meaning(self): - soup = self.soup("") - tag = soup.new_tag("a") - string = soup.new_string("") - self.assertRaises(ValueError, string.insert_after, tag) - self.assertRaises(NotImplementedError, soup.insert_after, tag) - self.assertRaises(ValueError, tag.insert_after, tag) - - def test_insert_before_raises_notimplementederror_if_before_has_no_meaning(self): - soup = self.soup("") - tag = soup.new_tag("a") - string = soup.new_string("") - self.assertRaises(ValueError, string.insert_before, tag) - self.assertRaises(NotImplementedError, soup.insert_before, tag) - self.assertRaises(ValueError, tag.insert_before, tag) - - def test_replace_with(self): - soup = self.soup( - "

        There's no business like show business

        ") - no, show = soup.find_all('b') - show.replace_with(no) - self.assertEqual( - soup.decode(), - self.document_for( - "

        There's business like no business

        ")) - - self.assertEqual(show.parent, None) - self.assertEqual(no.parent, soup.p) - self.assertEqual(no.next_element, "no") - self.assertEqual(no.next_sibling, " business") - - def test_replace_first_child(self): - data = "" - soup = self.soup(data) - soup.b.replace_with(soup.c) - self.assertEqual("", soup.decode()) - - def test_replace_last_child(self): - data = "" - soup = self.soup(data) - soup.c.replace_with(soup.b) - self.assertEqual("", soup.decode()) - - def test_nested_tag_replace_with(self): - soup = self.soup( - """Wereservetherighttorefuseservice""") - - # Replace the entire tag and its contents ("reserve the - # right") with the tag ("refuse"). - remove_tag = soup.b - move_tag = soup.f - remove_tag.replace_with(move_tag) - - self.assertEqual( - soup.decode(), self.document_for( - "Werefusetoservice")) - - # The tag is now an orphan. - self.assertEqual(remove_tag.parent, None) - self.assertEqual(remove_tag.find(text="right").next_element, None) - self.assertEqual(remove_tag.previous_element, None) - self.assertEqual(remove_tag.next_sibling, None) - self.assertEqual(remove_tag.previous_sibling, None) - - # The tag is now connected to the tag. - self.assertEqual(move_tag.parent, soup.a) - self.assertEqual(move_tag.previous_element, "We") - self.assertEqual(move_tag.next_element.next_element, soup.e) - self.assertEqual(move_tag.next_sibling, None) - - # The gap where the tag used to be has been mended, and - # the word "to" is now connected to the tag. - to_text = soup.find(text="to") - g_tag = soup.g - self.assertEqual(to_text.next_element, g_tag) - self.assertEqual(to_text.next_sibling, g_tag) - self.assertEqual(g_tag.previous_element, to_text) - self.assertEqual(g_tag.previous_sibling, to_text) - - def test_unwrap(self): - tree = self.soup(""" -

        Unneeded formatting is unneeded

        - """) - tree.em.unwrap() - self.assertEqual(tree.em, None) - self.assertEqual(tree.p.text, "Unneeded formatting is unneeded") - - def test_wrap(self): - soup = self.soup("I wish I was bold.") - value = soup.string.wrap(soup.new_tag("b")) - self.assertEqual(value.decode(), "I wish I was bold.") - self.assertEqual( - soup.decode(), self.document_for("I wish I was bold.")) - - def test_wrap_extracts_tag_from_elsewhere(self): - soup = self.soup("I wish I was bold.") - soup.b.next_sibling.wrap(soup.b) - self.assertEqual( - soup.decode(), self.document_for("I wish I was bold.")) - - def test_wrap_puts_new_contents_at_the_end(self): - soup = self.soup("I like being bold.I wish I was bold.") - soup.b.next_sibling.wrap(soup.b) - self.assertEqual(2, len(soup.b.contents)) - self.assertEqual( - soup.decode(), self.document_for( - "I like being bold.I wish I was bold.")) - - def test_extract(self): - soup = self.soup( - 'Some content. More content.') - - self.assertEqual(len(soup.body.contents), 3) - extracted = soup.find(id="nav").extract() - - self.assertEqual( - soup.decode(), "Some content. More content.") - self.assertEqual(extracted.decode(), '') - - # The extracted tag is now an orphan. - self.assertEqual(len(soup.body.contents), 2) - self.assertEqual(extracted.parent, None) - self.assertEqual(extracted.previous_element, None) - self.assertEqual(extracted.next_element.next_element, None) - - # The gap where the extracted tag used to be has been mended. - content_1 = soup.find(text="Some content. ") - content_2 = soup.find(text=" More content.") - self.assertEqual(content_1.next_element, content_2) - self.assertEqual(content_1.next_sibling, content_2) - self.assertEqual(content_2.previous_element, content_1) - self.assertEqual(content_2.previous_sibling, content_1) - - def test_extract_distinguishes_between_identical_strings(self): - soup = self.soup("
        foobar") - foo_1 = soup.a.string - bar_1 = soup.b.string - foo_2 = soup.new_string("foo") - bar_2 = soup.new_string("bar") - soup.a.append(foo_2) - soup.b.append(bar_2) - - # Now there are two identical strings in the tag, and two - # in the tag. Let's remove the first "foo" and the second - # "bar". - foo_1.extract() - bar_2.extract() - self.assertEqual(foo_2, soup.a.string) - self.assertEqual(bar_2, soup.b.string) - - def test_extract_multiples_of_same_tag(self): - soup = self.soup(""" - - - - - - - - - -""") - [soup.script.extract() for i in soup.find_all("script")] - self.assertEqual("\n\n\n", unicode(soup.body)) - - - def test_extract_works_when_element_is_surrounded_by_identical_strings(self): - soup = self.soup( - '\n' - 'hi\n' - '') - soup.find('body').extract() - self.assertEqual(None, soup.find('body')) - - - def test_clear(self): - """Tag.clear()""" - soup = self.soup("

        String Italicized and another

        ") - # clear using extract() - a = soup.a - soup.p.clear() - self.assertEqual(len(soup.p.contents), 0) - self.assertTrue(hasattr(a, "contents")) - - # clear using decompose() - em = a.em - a.clear(decompose=True) - self.assertEqual(0, len(em.contents)) - - def test_string_set(self): - """Tag.string = 'string'""" - soup = self.soup(" ") - soup.a.string = "foo" - self.assertEqual(soup.a.contents, ["foo"]) - soup.b.string = "bar" - self.assertEqual(soup.b.contents, ["bar"]) - - def test_string_set_does_not_affect_original_string(self): - soup = self.soup("foobar") - soup.b.string = soup.c.string - self.assertEqual(soup.a.encode(), b"barbar") - - def test_set_string_preserves_class_of_string(self): - soup = self.soup("") - cdata = CData("foo") - soup.a.string = cdata - self.assertTrue(isinstance(soup.a.string, CData)) - -class TestElementObjects(SoupTest): - """Test various features of element objects.""" - - def test_len(self): - """The length of an element is its number of children.""" - soup = self.soup("123") - - # The BeautifulSoup object itself contains one element: the - # tag. - self.assertEqual(len(soup.contents), 1) - self.assertEqual(len(soup), 1) - - # The tag contains three elements: the text node "1", the - # tag, and the text node "3". - self.assertEqual(len(soup.top), 3) - self.assertEqual(len(soup.top.contents), 3) - - def test_member_access_invokes_find(self): - """Accessing a Python member .foo invokes find('foo')""" - soup = self.soup('') - self.assertEqual(soup.b, soup.find('b')) - self.assertEqual(soup.b.i, soup.find('b').find('i')) - self.assertEqual(soup.a, None) - - def test_deprecated_member_access(self): - soup = self.soup('') - with warnings.catch_warnings(record=True) as w: - tag = soup.bTag - self.assertEqual(soup.b, tag) - self.assertEqual( - '.bTag is deprecated, use .find("b") instead.', - str(w[0].message)) - - def test_has_attr(self): - """has_attr() checks for the presence of an attribute. - - Please note note: has_attr() is different from - __in__. has_attr() checks the tag's attributes and __in__ - checks the tag's chidlren. - """ - soup = self.soup("") - self.assertTrue(soup.foo.has_attr('attr')) - self.assertFalse(soup.foo.has_attr('attr2')) - - - def test_attributes_come_out_in_alphabetical_order(self): - markup = '' - self.assertSoupEquals(markup, '') - - def test_string(self): - # A tag that contains only a text node makes that node - # available as .string. - soup = self.soup("foo") - self.assertEqual(soup.b.string, 'foo') - - def test_empty_tag_has_no_string(self): - # A tag with no children has no .stirng. - soup = self.soup("") - self.assertEqual(soup.b.string, None) - - def test_tag_with_multiple_children_has_no_string(self): - # A tag with no children has no .string. - soup = self.soup("foo") - self.assertEqual(soup.b.string, None) - - soup = self.soup("foobar
        ") - self.assertEqual(soup.b.string, None) - - # Even if all the children are strings, due to trickery, - # it won't work--but this would be a good optimization. - soup = self.soup("foo
        ") - soup.a.insert(1, "bar") - self.assertEqual(soup.a.string, None) - - def test_tag_with_recursive_string_has_string(self): - # A tag with a single child which has a .string inherits that - # .string. - soup = self.soup("foo") - self.assertEqual(soup.a.string, "foo") - self.assertEqual(soup.string, "foo") - - def test_lack_of_string(self): - """Only a tag containing a single text node has a .string.""" - soup = self.soup("feo") - self.assertFalse(soup.b.string) - - soup = self.soup("") - self.assertFalse(soup.b.string) - - def test_all_text(self): - """Tag.text and Tag.get_text(sep=u"") -> all child text, concatenated""" - soup = self.soup("ar t ") - self.assertEqual(soup.a.text, "ar t ") - self.assertEqual(soup.a.get_text(strip=True), "art") - self.assertEqual(soup.a.get_text(","), "a,r, , t ") - self.assertEqual(soup.a.get_text(",", strip=True), "a,r,t") - - def test_get_text_ignores_comments(self): - soup = self.soup("foobar") - self.assertEqual(soup.get_text(), "foobar") - - self.assertEqual( - soup.get_text(types=(NavigableString, Comment)), "fooIGNOREbar") - self.assertEqual( - soup.get_text(types=None), "fooIGNOREbar") - - def test_all_strings_ignores_comments(self): - soup = self.soup("foobar") - self.assertEqual(['foo', 'bar'], list(soup.strings)) - -class TestCDAtaListAttributes(SoupTest): - - """Testing cdata-list attributes like 'class'. - """ - def test_single_value_becomes_list(self): - soup = self.soup("") - self.assertEqual(["foo"],soup.a['class']) - - def test_multiple_values_becomes_list(self): - soup = self.soup("") - self.assertEqual(["foo", "bar"], soup.a['class']) - - def test_multiple_values_separated_by_weird_whitespace(self): - soup = self.soup("") - self.assertEqual(["foo", "bar", "baz"],soup.a['class']) - - def test_attributes_joined_into_string_on_output(self): - soup = self.soup("") - self.assertEqual(b'', soup.a.encode()) - - def test_accept_charset(self): - soup = self.soup('
        ') - self.assertEqual(['ISO-8859-1', 'UTF-8'], soup.form['accept-charset']) - - def test_cdata_attribute_applying_only_to_one_tag(self): - data = '' - soup = self.soup(data) - # We saw in another test that accept-charset is a cdata-list - # attribute for the tag. But it's not a cdata-list - # attribute for any other tag. - self.assertEqual('ISO-8859-1 UTF-8', soup.a['accept-charset']) - - def test_string_has_immutable_name_property(self): - string = self.soup("s").string - self.assertEqual(None, string.name) - def t(): - string.name = 'foo' - self.assertRaises(AttributeError, t) - -class TestPersistence(SoupTest): - "Testing features like pickle and deepcopy." - - def setUp(self): - super(TestPersistence, self).setUp() - self.page = """ - - - -Beautiful Soup: We called him Tortoise because he taught us. - - - - - - -foo -bar - -""" - self.tree = self.soup(self.page) - - def test_pickle_and_unpickle_identity(self): - # Pickling a tree, then unpickling it, yields a tree identical - # to the original. - dumped = pickle.dumps(self.tree, 2) - loaded = pickle.loads(dumped) - self.assertEqual(loaded.__class__, BeautifulSoup) - self.assertEqual(loaded.decode(), self.tree.decode()) - - def test_deepcopy_identity(self): - # Making a deepcopy of a tree yields an identical tree. - copied = copy.deepcopy(self.tree) - self.assertEqual(copied.decode(), self.tree.decode()) - - def test_copy_preserves_encoding(self): - soup = BeautifulSoup(b'

         

        ', 'html.parser') - encoding = soup.original_encoding - copy = soup.__copy__() - self.assertEqual(u"

         

        ", unicode(copy)) - self.assertEqual(encoding, copy.original_encoding) - - def test_unicode_pickle(self): - # A tree containing Unicode characters can be pickled. - html = u"\N{SNOWMAN}" - soup = self.soup(html) - dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL) - loaded = pickle.loads(dumped) - self.assertEqual(loaded.decode(), soup.decode()) - - def test_copy_navigablestring_is_not_attached_to_tree(self): - html = u"FooBar" - soup = self.soup(html) - s1 = soup.find(string="Foo") - s2 = copy.copy(s1) - self.assertEqual(s1, s2) - self.assertEqual(None, s2.parent) - self.assertEqual(None, s2.next_element) - self.assertNotEqual(None, s1.next_sibling) - self.assertEqual(None, s2.next_sibling) - self.assertEqual(None, s2.previous_element) - - def test_copy_navigablestring_subclass_has_same_type(self): - html = u"" - soup = self.soup(html) - s1 = soup.string - s2 = copy.copy(s1) - self.assertEqual(s1, s2) - self.assertTrue(isinstance(s2, Comment)) - - def test_copy_entire_soup(self): - html = u"
        FooBar
        end" - soup = self.soup(html) - soup_copy = copy.copy(soup) - self.assertEqual(soup, soup_copy) - - def test_copy_tag_copies_contents(self): - html = u"
        FooBar
        end" - soup = self.soup(html) - div = soup.div - div_copy = copy.copy(div) - - # The two tags look the same, and evaluate to equal. - self.assertEqual(unicode(div), unicode(div_copy)) - self.assertEqual(div, div_copy) - - # But they're not the same object. - self.assertFalse(div is div_copy) - - # And they don't have the same relation to the parse tree. The - # copy is not associated with a parse tree at all. - self.assertEqual(None, div_copy.parent) - self.assertEqual(None, div_copy.previous_element) - self.assertEqual(None, div_copy.find(string='Bar').next_element) - self.assertNotEqual(None, div.find(string='Bar').next_element) - -class TestSubstitutions(SoupTest): - - def test_default_formatter_is_minimal(self): - markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter="minimal") - # The < is converted back into < but the e-with-acute is left alone. - self.assertEqual( - decoded, - self.document_for( - u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>")) - - def test_formatter_html(self): - markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter="html") - self.assertEqual( - decoded, - self.document_for("<<Sacré bleu!>>")) - - def test_formatter_minimal(self): - markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter="minimal") - # The < is converted back into < but the e-with-acute is left alone. - self.assertEqual( - decoded, - self.document_for( - u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>")) - - def test_formatter_null(self): - markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter=None) - # Neither the angle brackets nor the e-with-acute are converted. - # This is not valid HTML, but it's what the user wanted. - self.assertEqual(decoded, - self.document_for(u"<>")) - - def test_formatter_custom(self): - markup = u"<foo>bar" - soup = self.soup(markup) - decoded = soup.decode(formatter = lambda x: x.upper()) - # Instead of normal entity conversion code, the custom - # callable is called on every string. - self.assertEqual( - decoded, - self.document_for(u"BAR")) - - def test_formatter_is_run_on_attribute_values(self): - markup = u'e' - soup = self.soup(markup) - a = soup.a - - expect_minimal = u'e' - - self.assertEqual(expect_minimal, a.decode()) - self.assertEqual(expect_minimal, a.decode(formatter="minimal")) - - expect_html = u'e' - self.assertEqual(expect_html, a.decode(formatter="html")) - - self.assertEqual(markup, a.decode(formatter=None)) - expect_upper = u'E' - self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper())) - - def test_formatter_skips_script_tag_for_html_documents(self): - doc = """ - -""" - encoded = BeautifulSoup(doc, 'html.parser').encode() - self.assertTrue(b"< < hey > >" in encoded) - - def test_formatter_skips_style_tag_for_html_documents(self): - doc = """ - -""" - encoded = BeautifulSoup(doc, 'html.parser').encode() - self.assertTrue(b"< < hey > >" in encoded) - - def test_prettify_leaves_preformatted_text_alone(self): - soup = self.soup("
        foo
          \tbar\n  \n  
        baz ") - # Everything outside the
         tag is reformatted, but everything
        -        # inside is left alone.
        -        self.assertEqual(
        -            u'
        \n foo\n
          \tbar\n  \n  
        \n baz\n
        ', - soup.div.prettify()) - - def test_prettify_accepts_formatter(self): - soup = BeautifulSoup("foo", 'html.parser') - pretty = soup.prettify(formatter = lambda x: x.upper()) - self.assertTrue("FOO" in pretty) - - def test_prettify_outputs_unicode_by_default(self): - soup = self.soup("") - self.assertEqual(unicode, type(soup.prettify())) - - def test_prettify_can_encode_data(self): - soup = self.soup("") - self.assertEqual(bytes, type(soup.prettify("utf-8"))) - - def test_html_entity_substitution_off_by_default(self): - markup = u"Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!" - soup = self.soup(markup) - encoded = soup.b.encode("utf-8") - self.assertEqual(encoded, markup.encode('utf-8')) - - def test_encoding_substitution(self): - # Here's the tag saying that a document is - # encoded in Shift-JIS. - meta_tag = ('') - soup = self.soup(meta_tag) - - # Parse the document, and the charset apprears unchanged. - self.assertEqual(soup.meta['content'], 'text/html; charset=x-sjis') - - # Encode the document into some encoding, and the encoding is - # substituted into the meta tag. - utf_8 = soup.encode("utf-8") - self.assertTrue(b"charset=utf-8" in utf_8) - - euc_jp = soup.encode("euc_jp") - self.assertTrue(b"charset=euc_jp" in euc_jp) - - shift_jis = soup.encode("shift-jis") - self.assertTrue(b"charset=shift-jis" in shift_jis) - - utf_16_u = soup.encode("utf-16").decode("utf-16") - self.assertTrue("charset=utf-16" in utf_16_u) - - def test_encoding_substitution_doesnt_happen_if_tag_is_strained(self): - markup = ('
        foo
        ') - - # Beautiful Soup used to try to rewrite the meta tag even if the - # meta tag got filtered out by the strainer. This test makes - # sure that doesn't happen. - strainer = SoupStrainer('pre') - soup = self.soup(markup, parse_only=strainer) - self.assertEqual(soup.contents[0].name, 'pre') - -class TestEncoding(SoupTest): - """Test the ability to encode objects into strings.""" - - def test_unicode_string_can_be_encoded(self): - html = u"\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual(soup.b.string.encode("utf-8"), - u"\N{SNOWMAN}".encode("utf-8")) - - def test_tag_containing_unicode_string_can_be_encoded(self): - html = u"\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual( - soup.b.encode("utf-8"), html.encode("utf-8")) - - def test_encoding_substitutes_unrecognized_characters_by_default(self): - html = u"\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual(soup.b.encode("ascii"), b"") - - def test_encoding_can_be_made_strict(self): - html = u"\N{SNOWMAN}" - soup = self.soup(html) - self.assertRaises( - UnicodeEncodeError, soup.encode, "ascii", errors="strict") - - def test_decode_contents(self): - html = u"\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual(u"\N{SNOWMAN}", soup.b.decode_contents()) - - def test_encode_contents(self): - html = u"\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual( - u"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents( - encoding="utf8")) - - def test_deprecated_renderContents(self): - html = u"\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual( - u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents()) - - def test_repr(self): - html = u"\N{SNOWMAN}" - soup = self.soup(html) - if PY3K: - self.assertEqual(html, repr(soup)) - else: - self.assertEqual(b'\\u2603', repr(soup)) - -class TestNavigableStringSubclasses(SoupTest): - - def test_cdata(self): - # None of the current builders turn CDATA sections into CData - # objects, but you can create them manually. - soup = self.soup("") - cdata = CData("foo") - soup.insert(1, cdata) - self.assertEqual(str(soup), "") - self.assertEqual(soup.find(text="foo"), "foo") - self.assertEqual(soup.contents[0], "foo") - - def test_cdata_is_never_formatted(self): - """Text inside a CData object is passed into the formatter. - - But the return value is ignored. - """ - - self.count = 0 - def increment(*args): - self.count += 1 - return "BITTER FAILURE" - - soup = self.soup("") - cdata = CData("<><><>") - soup.insert(1, cdata) - self.assertEqual( - b"<><>]]>", soup.encode(formatter=increment)) - self.assertEqual(1, self.count) - - def test_doctype_ends_in_newline(self): - # Unlike other NavigableString subclasses, a DOCTYPE always ends - # in a newline. - doctype = Doctype("foo") - soup = self.soup("") - soup.insert(1, doctype) - self.assertEqual(soup.encode(), b"\n") - - def test_declaration(self): - d = Declaration("foo") - self.assertEqual("", d.output_ready()) - -class TestSoupSelector(TreeTest): - - HTML = """ - - - -The title - - - -Hello there. -
        -
        -

        An H1

        -

        Some text

        -

        Some more text

        -

        An H2

        -

        Another

        -Bob -

        Another H2

        -me - -span1a1 -span1a2 test - -span2a1 - - - -
        - -
        - - - - - - - - -

        English

        -

        English UK

        -

        English US

        -

        French

        -
        - - -""" - - def setUp(self): - self.soup = BeautifulSoup(self.HTML, 'html.parser') - - def assertSelects(self, selector, expected_ids, **kwargs): - el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)] - el_ids.sort() - expected_ids.sort() - self.assertEqual(expected_ids, el_ids, - "Selector %s, expected [%s], got [%s]" % ( - selector, ', '.join(expected_ids), ', '.join(el_ids) - ) - ) - - assertSelect = assertSelects - - def assertSelectMultiple(self, *tests): - for selector, expected_ids in tests: - self.assertSelect(selector, expected_ids) - - def test_one_tag_one(self): - els = self.soup.select('title') - self.assertEqual(len(els), 1) - self.assertEqual(els[0].name, 'title') - self.assertEqual(els[0].contents, [u'The title']) - - def test_one_tag_many(self): - els = self.soup.select('div') - self.assertEqual(len(els), 4) - for div in els: - self.assertEqual(div.name, 'div') - - el = self.soup.select_one('div') - self.assertEqual('main', el['id']) - - def test_select_one_returns_none_if_no_match(self): - match = self.soup.select_one('nonexistenttag') - self.assertEqual(None, match) - - - def test_tag_in_tag_one(self): - els = self.soup.select('div div') - self.assertSelects('div div', ['inner', 'data1']) - - def test_tag_in_tag_many(self): - for selector in ('html div', 'html body div', 'body div'): - self.assertSelects(selector, ['data1', 'main', 'inner', 'footer']) - - - def test_limit(self): - self.assertSelects('html div', ['main'], limit=1) - self.assertSelects('html body div', ['inner', 'main'], limit=2) - self.assertSelects('body div', ['data1', 'main', 'inner', 'footer'], - limit=10) - - def test_tag_no_match(self): - self.assertEqual(len(self.soup.select('del')), 0) - - def test_invalid_tag(self): - self.assertRaises(ValueError, self.soup.select, 'tag%t') - - def test_select_dashed_tag_ids(self): - self.assertSelects('custom-dashed-tag', ['dash1', 'dash2']) - - def test_select_dashed_by_id(self): - dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]') - self.assertEqual(dashed[0].name, 'custom-dashed-tag') - self.assertEqual(dashed[0]['id'], 'dash2') - - def test_dashed_tag_text(self): - self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, u'Hello there.') - - def test_select_dashed_matches_find_all(self): - self.assertEqual(self.soup.select('custom-dashed-tag'), self.soup.find_all('custom-dashed-tag')) - - def test_header_tags(self): - self.assertSelectMultiple( - ('h1', ['header1']), - ('h2', ['header2', 'header3']), - ) - - def test_class_one(self): - for selector in ('.onep', 'p.onep', 'html p.onep'): - els = self.soup.select(selector) - self.assertEqual(len(els), 1) - self.assertEqual(els[0].name, 'p') - self.assertEqual(els[0]['class'], ['onep']) - - def test_class_mismatched_tag(self): - els = self.soup.select('div.onep') - self.assertEqual(len(els), 0) - - def test_one_id(self): - for selector in ('div#inner', '#inner', 'div div#inner'): - self.assertSelects(selector, ['inner']) - - def test_bad_id(self): - els = self.soup.select('#doesnotexist') - self.assertEqual(len(els), 0) - - def test_items_in_id(self): - els = self.soup.select('div#inner p') - self.assertEqual(len(els), 3) - for el in els: - self.assertEqual(el.name, 'p') - self.assertEqual(els[1]['class'], ['onep']) - self.assertFalse(els[0].has_attr('class')) - - def test_a_bunch_of_emptys(self): - for selector in ('div#main del', 'div#main div.oops', 'div div#main'): - self.assertEqual(len(self.soup.select(selector)), 0) - - def test_multi_class_support(self): - for selector in ('.class1', 'p.class1', '.class2', 'p.class2', - '.class3', 'p.class3', 'html p.class2', 'div#inner .class2'): - self.assertSelects(selector, ['pmulti']) - - def test_multi_class_selection(self): - for selector in ('.class1.class3', '.class3.class2', - '.class1.class2.class3'): - self.assertSelects(selector, ['pmulti']) - - def test_child_selector(self): - self.assertSelects('.s1 > a', ['s1a1', 's1a2']) - self.assertSelects('.s1 > a span', ['s1a2s1']) - - def test_child_selector_id(self): - self.assertSelects('.s1 > a#s1a2 span', ['s1a2s1']) - - def test_attribute_equals(self): - self.assertSelectMultiple( - ('p[class="onep"]', ['p1']), - ('p[id="p1"]', ['p1']), - ('[class="onep"]', ['p1']), - ('[id="p1"]', ['p1']), - ('link[rel="stylesheet"]', ['l1']), - ('link[type="text/css"]', ['l1']), - ('link[href="https://github.com/blah.css"]', ['l1']), - ('link[href="https://github.com/no-blah.css"]', []), - ('[rel="stylesheet"]', ['l1']), - ('[type="text/css"]', ['l1']), - ('[href="https://github.com/blah.css"]', ['l1']), - ('[href="https://github.com/no-blah.css"]', []), - ('p[href="https://github.com/no-blah.css"]', []), - ('[href="https://github.com/no-blah.css"]', []), - ) - - def test_attribute_tilde(self): - self.assertSelectMultiple( - ('p[class~="class1"]', ['pmulti']), - ('p[class~="class2"]', ['pmulti']), - ('p[class~="class3"]', ['pmulti']), - ('[class~="class1"]', ['pmulti']), - ('[class~="class2"]', ['pmulti']), - ('[class~="class3"]', ['pmulti']), - ('a[rel~="friend"]', ['bob']), - ('a[rel~="met"]', ['bob']), - ('[rel~="friend"]', ['bob']), - ('[rel~="met"]', ['bob']), - ) - - def test_attribute_startswith(self): - self.assertSelectMultiple( - ('[rel^="style"]', ['l1']), - ('link[rel^="style"]', ['l1']), - ('notlink[rel^="notstyle"]', []), - ('[rel^="notstyle"]', []), - ('link[rel^="notstyle"]', []), - ('link[href^="bla"]', ['l1']), - ('a[href^="http://"]', ['bob', 'me']), - ('[href^="http://"]', ['bob', 'me']), - ('[id^="p"]', ['pmulti', 'p1']), - ('[id^="m"]', ['me', 'main']), - ('div[id^="m"]', ['main']), - ('a[id^="m"]', ['me']), - ('div[data-tag^="dashed"]', ['data1']) - ) - - def test_attribute_endswith(self): - self.assertSelectMultiple( - ('[href$=".css"]', ['l1']), - ('link[href$=".css"]', ['l1']), - ('link[id$="1"]', ['l1']), - ('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']), - ('div[id$="1"]', ['data1']), - ('[id$="noending"]', []), - ) - - def test_attribute_contains(self): - self.assertSelectMultiple( - # From test_attribute_startswith - ('[rel*="style"]', ['l1']), - ('link[rel*="style"]', ['l1']), - ('notlink[rel*="notstyle"]', []), - ('[rel*="notstyle"]', []), - ('link[rel*="notstyle"]', []), - ('link[href*="bla"]', ['l1']), - ('[href*="http://"]', ['bob', 'me']), - ('[id*="p"]', ['pmulti', 'p1']), - ('div[id*="m"]', ['main']), - ('a[id*="m"]', ['me']), - # From test_attribute_endswith - ('[href*=".css"]', ['l1']), - ('link[href*=".css"]', ['l1']), - ('link[id*="1"]', ['l1']), - ('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']), - ('div[id*="1"]', ['data1']), - ('[id*="noending"]', []), - # New for this test - ('[href*="."]', ['bob', 'me', 'l1']), - ('a[href*="."]', ['bob', 'me']), - ('link[href*="."]', ['l1']), - ('div[id*="n"]', ['main', 'inner']), - ('div[id*="nn"]', ['inner']), - ('div[data-tag*="edval"]', ['data1']) - ) - - def test_attribute_exact_or_hypen(self): - self.assertSelectMultiple( - ('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']), - ('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']), - ('p[lang|="fr"]', ['lang-fr']), - ('p[lang|="gb"]', []), - ) - - def test_attribute_exists(self): - self.assertSelectMultiple( - ('[rel]', ['l1', 'bob', 'me']), - ('link[rel]', ['l1']), - ('a[rel]', ['bob', 'me']), - ('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']), - ('p[class]', ['p1', 'pmulti']), - ('[blah]', []), - ('p[blah]', []), - ('div[data-tag]', ['data1']) - ) - - def test_quoted_space_in_selector_name(self): - html = """
        nope
        -
        yes
        - """ - soup = BeautifulSoup(html, 'html.parser') - [chosen] = soup.select('div[style="display: right"]') - self.assertEqual("yes", chosen.string) - - def test_unsupported_pseudoclass(self): - self.assertRaises( - NotImplementedError, self.soup.select, "a:no-such-pseudoclass") - - self.assertRaises( - NotImplementedError, self.soup.select, "a:nth-of-type(a)") - - - def test_nth_of_type(self): - # Try to select first paragraph - els = self.soup.select('div#inner p:nth-of-type(1)') - self.assertEqual(len(els), 1) - self.assertEqual(els[0].string, u'Some text') - - # Try to select third paragraph - els = self.soup.select('div#inner p:nth-of-type(3)') - self.assertEqual(len(els), 1) - self.assertEqual(els[0].string, u'Another') - - # Try to select (non-existent!) fourth paragraph - els = self.soup.select('div#inner p:nth-of-type(4)') - self.assertEqual(len(els), 0) - - # Pass in an invalid value. - self.assertRaises( - ValueError, self.soup.select, 'div p:nth-of-type(0)') - - def test_nth_of_type_direct_descendant(self): - els = self.soup.select('div#inner > p:nth-of-type(1)') - self.assertEqual(len(els), 1) - self.assertEqual(els[0].string, u'Some text') - - def test_id_child_selector_nth_of_type(self): - self.assertSelects('#inner > p:nth-of-type(2)', ['p1']) - - def test_select_on_element(self): - # Other tests operate on the tree; this operates on an element - # within the tree. - inner = self.soup.find("div", id="main") - selected = inner.select("div") - # The
        tag was selected. The