From 89ed43dc8453254c97546549c31ce591d63e8533 Mon Sep 17 00:00:00 2001 From: Toby Hodges Date: Thu, 10 Aug 2017 15:36:18 +0200 Subject: [PATCH 1/7] initial addition of EBI InterProScan files for custom types example --- _includes/cwl/InterProScan-apps.yml | 18 ++++++++ _includes/cwl/custom-types.cwl | 71 +++++++++++++++++++++++++++++ _includes/cwl/custom-types.yml | 1 + 3 files changed, 90 insertions(+) create mode 100644 _includes/cwl/InterProScan-apps.yml create mode 100644 _includes/cwl/custom-types.cwl create mode 100644 _includes/cwl/custom-types.yml diff --git a/_includes/cwl/InterProScan-apps.yml b/_includes/cwl/InterProScan-apps.yml new file mode 100644 index 00000000..e42779c3 --- /dev/null +++ b/_includes/cwl/InterProScan-apps.yml @@ -0,0 +1,18 @@ +type: enum +name: apps +symbols: + - TIGRFAM + - SFLD + - SUPERFAMILY + - Gene3D + - Hamap + - Coils + - ProSiteProfiles + - SMART + - CDD + - PRINTS + - PIRSF + - ProSitePatterns + - Pfam + - ProDom + - MobiDBLite diff --git a/_includes/cwl/custom-types.cwl b/_includes/cwl/custom-types.cwl new file mode 100644 index 00000000..26ca2673 --- /dev/null +++ b/_includes/cwl/custom-types.cwl @@ -0,0 +1,71 @@ +cwlVersion: v1.0 +class: CommandLineTool + +label: "InterProScan: protein sequence classifier" + +doc: | + Version 5.21-60 can be downloaded here: + https://github.com/ebi-pf-team/interproscan/wiki/HowToDownload + + Documentation on how to run InterProScan 5 can be found here: + https://github.com/ebi-pf-team/interproscan/wiki/HowToRun + +requirements: + ResourceRequirement: + ramMin: 10240 + coresMin: 3 + SchemaDefRequirement: + types: + - $import: InterProScan-apps.yaml + - $import: InterProScan-protein_formats.yaml +hints: + SoftwareRequirement: + packages: + interproscan: + specs: [ "https://identifiers.org/rrid/RRID:SCR_005829" ] + version: [ "5.21-60" ] + +inputs: + proteinFile: + type: File + inputBinding: + prefix: --input + # outputFileType: + # type: InterProScan-protein_formats.yaml#protein_formats + # inputBinding: + # prefix: --formats + applications: + type: InterProScan-apps.yaml#apps[]? + inputBinding: + itemSeparator: ',' + prefix: --applications + +baseCommand: interproscan.sh + +arguments: + - valueFrom: $(inputs.proteinFile.nameroot).i5_annotations + prefix: --outfile + - valueFrom: TSV + prefix: --formats + - --disable-precalc + - --goterms + - --pathways + - valueFrom: $(runtime.tmpdir) + prefix: --tempdir + + +outputs: + i5Annotations: + type: File + format: iana:text/tab-separated-values + outputBinding: + glob: $(inputs.proteinFile.nameroot).i5_annotations + +$namespaces: + iana: https://www.iana.org/assignments/media-types/ + s: http://schema.org/ +$schemas: + - https://schema.org/docs/schema_org_rdfa.html + +s:license: "https://www.apache.org/licenses/LICENSE-2.0" +s:copyrightHolder: "EMBL - European Bioinformatics Institute" diff --git a/_includes/cwl/custom-types.yml b/_includes/cwl/custom-types.yml new file mode 100644 index 00000000..2c53f291 --- /dev/null +++ b/_includes/cwl/custom-types.yml @@ -0,0 +1 @@ +proteinFile: test_proteins.fasta From 90d3d54cd086471a84b53dd7e9e813e433b16ec4 Mon Sep 17 00:00:00 2001 From: Toby Hodges Date: Thu, 10 Aug 2017 16:10:39 +0200 Subject: [PATCH 2/7] format fixes, started simplifying CWL description --- _includes/cwl/custom-types.cwl | 10 +++------- _includes/cwl/custom-types.yml | 4 +++- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/_includes/cwl/custom-types.cwl b/_includes/cwl/custom-types.cwl index 26ca2673..75d1b294 100644 --- a/_includes/cwl/custom-types.cwl +++ b/_includes/cwl/custom-types.cwl @@ -16,8 +16,8 @@ requirements: coresMin: 3 SchemaDefRequirement: types: - - $import: InterProScan-apps.yaml - - $import: InterProScan-protein_formats.yaml + - $import: InterProScan-apps.yml + hints: SoftwareRequirement: packages: @@ -30,12 +30,8 @@ inputs: type: File inputBinding: prefix: --input - # outputFileType: - # type: InterProScan-protein_formats.yaml#protein_formats - # inputBinding: - # prefix: --formats applications: - type: InterProScan-apps.yaml#apps[]? + type: InterProScan-apps.yml#apps[]? inputBinding: itemSeparator: ',' prefix: --applications diff --git a/_includes/cwl/custom-types.yml b/_includes/cwl/custom-types.yml index 2c53f291..ec8fb9e6 100644 --- a/_includes/cwl/custom-types.yml +++ b/_includes/cwl/custom-types.yml @@ -1 +1,3 @@ -proteinFile: test_proteins.fasta +proteinFile: + class: File + path: test_proteins.fasta From e29dd9db6e964ccb6bfa1ed6bcea25339bc94352 Mon Sep 17 00:00:00 2001 From: Toby Hodges Date: Fri, 11 Aug 2017 14:32:30 +0200 Subject: [PATCH 3/7] added test proteins file from interproscan --- _includes/cwl/test_proteins.fasta | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 _includes/cwl/test_proteins.fasta diff --git a/_includes/cwl/test_proteins.fasta b/_includes/cwl/test_proteins.fasta new file mode 100644 index 00000000..ec34ed42 --- /dev/null +++ b/_includes/cwl/test_proteins.fasta @@ -0,0 +1,34 @@ +>Q97R95 +MKYKRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEAGHELILVSSGAIAAGFGALG +FKKRPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQILLTQDDFVDKRRYKNAHQAL +SVLLNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQADLLVFLTDVDGLYTGNPNS +DPRAKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAATIATESGVPVYICSSLKSDS +MIEAAEETEDGSYFVAQEKGLRTQKQWLAFYAQSQGSIWVDKGAAEALSQYGKSLLLSGI +VEAEGVFSYGDIVTVFDKESGKSLGKGRVQFGASALEDMLRSQKAKGVLIYRDDWISITP +EIQLLFTEF +>A2VDN9 +MEVKGKKKLTGKGTKMSQEKSKFHKNNDSGSSKTFPKKVVKEGGPKITSKNFEKTATKPGKKGVKQFKNKQQGDRIPKNK +FQQANKFNQKRKFQPDSKSDESAAKKPKWDEFKKKKKELKQSRQLSDKTNYDIVIRAKQIWEILRRKDCDKEKRVKLMSD +LQKLIQGKIKTIAFAHDSTRVIQCYIQFGNEEQRKQAFEELRGDLVELSKAKYSRNIVKKFLMYGSKAQIAEIIRSFKGH +VRKLLRHAEASAIVEYAYNDKAILEQRNMLTEELYGNTFQLYKSADHPTLDKVLEVQPEKLELIMDEMKQILTPMAQKEA +VIKHSLVHKVFLDFFTYAPPKLRSEMIEAIREAVVYLAHTHDGARVAMYCLWHGTPKDRKVIVKTMKTYIEKVANGQYSH +LVLLAAFDCIDDTKLVKQIIISEIINSLPNIVNDKYGRKVLLYLLSPRDPAHTVREIIEVLQKGDGNAHSKKDTEIRRRE +LLESISPALLSYLQGHAQEVVLDKSACVLVADILGTATGDVQPAMDAVASLAAAELHPGGKDGELHIAEHPAGHLVLKWL +IEQDKKMKERGREGCFAKTLIERVGVKNLKSWASVNRGAIILSSLLQSSDQEVANKVKAGLKSLIPALEKSKNTSKGIEM +LLEKLTA +>A2YIW7 +MAAEEGVVIACHNKDEFDAQMTKAKEAGKVVIIDFTASWCGPCRFIAPVFAEYAKKFPGAVFLKVDVDELKEVAEKYNVE +AMPTFLFIKDGAEADKVVGARKDDLQNTIVKHVGATAASASA +>P22298 +GRGLLPFVLLALGIXAPWAVEGAENALKGGACPPRKIVQCLRYEKPKCTSDWQCPDKKKC +CRDTCAIKCLNPVAITNPVKVKPGKCPVVYGQCMMLNPPNHCKTDSQCLGDLKCCKSMCG +KVCLTPVKA +>A0B6J9 +MSKIGKSIRLERIIDRKTRKTVIVPMDHGLTVGPIPGLIDLAAAVDKVAEGGANAVLGHM +GLPLYGHRGYGKDVGLIIHLSASTSLGPDANHKVLVTRVEDAIRVGADGVSIHVNVGAED +EAEMLRDLGMVARRCDLWGMPLLAMMYPRGAKVRSEHSVEYVKHAARVGAELGVDIVKTN +YTGSPETFREVVRGCPAPVVIAGGPKMDTEADLLQMVYDAMQAGAAGISIGRNIFQAENP +TLLTRKLSKIVHEGYTPEEAARLKL +>P02939 +MNRTKLVLGAVILGSTLLAGCSSNAKIDQLSTDVQTLNAKVDQLSNDVTAIRSDVQAAKD +DAARANQRLDNQAHSYRK \ No newline at end of file From 1a3f094373127504eb0a5aad139642f189614fd1 Mon Sep 17 00:00:00 2001 From: Toby Hodges Date: Fri, 11 Aug 2017 15:41:39 +0200 Subject: [PATCH 4/7] first draft of custom types episode --- _episodes/18-custom-types.md | 88 ++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 _episodes/18-custom-types.md diff --git a/_episodes/18-custom-types.md b/_episodes/18-custom-types.md new file mode 100644 index 00000000..cf388be8 --- /dev/null +++ b/_episodes/18-custom-types.md @@ -0,0 +1,88 @@ +--- +title: "Custom Types" +teaching: 10 +exercises: 0 +questions: +- "How do I create and import my own custom types into a CWL description?" +objectives: +- "Learn how to write custom CWL object types." +- "Learn how to import these custom objects into a tool description." +keypoints: +- "You can create your own custom types to load into descriptions." +- "These custom types allow the user to configure the behaviour of a tool + without tinkering directly with the tool description." +- "Custom types are described in separate YAML files and imported as needed." +--- + +Sometimes you may want to write your own custom types for use and reuse in CWL +descriptions. Use of such custom types can reduce redundancy between multiple +descriptions that all use the same type, and also allow for additional +customisation/configuration of a tool/analysis without the need to fiddle with +the CWL description directly. + +The example below is a CWL description of the [InterProScan][ips] tool for +simultaneously searching protein sequences against a wide variety of resources. +It is a good example of a number of good practices in CWL. + +*custom-types.cwl* + +~~~ +{% include cwl/custom-types.cwl %} +~~~ +{: .source} + +*custom-types.yml* + +~~~ +{% include cwl/tar-param-job.yml %} +~~~ +{: .source} + +On line 34, in `inputs:applications`, a list of applications to be used in the +search are imported as a custom object: + +``` +inputs: + proteinFile: + type: File + inputBinding: + prefix: --input + applications: + type: InterProScan-apps.yml#apps[]? + inputBinding: + itemSeparator: ',' + prefix: --applications +``` +{: .source} + +The reference to a custom type is a combination of the name of the file in which +the object is defined (`InterProScan-apps.yml`) and the name of the object +within that file (`apps`) that defines the custom type. The square brackets +`[]` define the value of the imported type as an array. + +The contents of the YAML file describing the custom type are given below: + +{% include cwl/InterProScan-apps.yml %} + +In order for the custom type to be used in the CWL description, it must be +imported. Imports are described in `requirements:SchemaDefRequirement`, as +below in the example `custom-types.cwl` description: + +``` +requirements: + ResourceRequirement: + ramMin: 10240 + coresMin: 3 + SchemaDefRequirement: + types: + - $import: InterProScan-apps.yml +``` +{: .source} + +Note also that the author of this CWL description has also included +`ResourceRequirement`s, specifying the minimum amount of RAM and number of cores +required for the tool to run successfully, as well as details of the version of +the software that the description was written for and other useful metadata. +These features are discussed further in other chapters of this user guide. + +[ips]: https://github.com/ebi-pf-team/interproscan From d5841774c56a8f5bb8a1ee729a8763f8ae82126a Mon Sep 17 00:00:00 2001 From: Toby Hodges Date: Fri, 11 Aug 2017 16:13:16 +0200 Subject: [PATCH 5/7] fixed formatting mistakes --- _episodes/18-custom-types.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/_episodes/18-custom-types.md b/_episodes/18-custom-types.md index cf388be8..30c38ce4 100644 --- a/_episodes/18-custom-types.md +++ b/_episodes/18-custom-types.md @@ -34,7 +34,7 @@ It is a good example of a number of good practices in CWL. *custom-types.yml* ~~~ -{% include cwl/tar-param-job.yml %} +{% include cwl/custom-types.yml %} ~~~ {: .source} @@ -62,7 +62,10 @@ within that file (`apps`) that defines the custom type. The square brackets The contents of the YAML file describing the custom type are given below: +~~~ {% include cwl/InterProScan-apps.yml %} +~~~ +{: .source} In order for the custom type to be used in the CWL description, it must be imported. Imports are described in `requirements:SchemaDefRequirement`, as From 02b637bb74eff41a141d456db3e849a4d3e9f0dc Mon Sep 17 00:00:00 2001 From: Toby Hodges Date: Fri, 11 Aug 2017 16:17:23 +0200 Subject: [PATCH 6/7] corrected lesson number in filename --- _episodes/{18-custom-types.md => 19-custom-types.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename _episodes/{18-custom-types.md => 19-custom-types.md} (100%) diff --git a/_episodes/18-custom-types.md b/_episodes/19-custom-types.md similarity index 100% rename from _episodes/18-custom-types.md rename to _episodes/19-custom-types.md From 3543b25ce25f8729bb9cdc68f910b3080fb497cc Mon Sep 17 00:00:00 2001 From: Toby Hodges Date: Fri, 8 Sep 2017 10:15:39 +0100 Subject: [PATCH 7/7] corrected explanation of [] in imported type description --- _episodes/19-custom-types.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/_episodes/19-custom-types.md b/_episodes/19-custom-types.md index 30c38ce4..39fe3757 100644 --- a/_episodes/19-custom-types.md +++ b/_episodes/19-custom-types.md @@ -57,8 +57,9 @@ inputs: The reference to a custom type is a combination of the name of the file in which the object is defined (`InterProScan-apps.yml`) and the name of the object -within that file (`apps`) that defines the custom type. The square brackets -`[]` define the value of the imported type as an array. +within that file (`apps`) that defines the custom type. The square brackets `[]` +mean that an array of the preceding type is expected, in this case the `apps` +type from the imported `InterProScan-apps.yaml` file The contents of the YAML file describing the custom type are given below: