diff --git a/_episodes/19-custom-types.md b/_episodes/19-custom-types.md new file mode 100644 index 00000000..39fe3757 --- /dev/null +++ b/_episodes/19-custom-types.md @@ -0,0 +1,92 @@ +--- +title: "Custom Types" +teaching: 10 +exercises: 0 +questions: +- "How do I create and import my own custom types into a CWL description?" +objectives: +- "Learn how to write custom CWL object types." +- "Learn how to import these custom objects into a tool description." +keypoints: +- "You can create your own custom types to load into descriptions." +- "These custom types allow the user to configure the behaviour of a tool + without tinkering directly with the tool description." +- "Custom types are described in separate YAML files and imported as needed." +--- + +Sometimes you may want to write your own custom types for use and reuse in CWL +descriptions. Use of such custom types can reduce redundancy between multiple +descriptions that all use the same type, and also allow for additional +customisation/configuration of a tool/analysis without the need to fiddle with +the CWL description directly. + +The example below is a CWL description of the [InterProScan][ips] tool for +simultaneously searching protein sequences against a wide variety of resources. +It is a good example of a number of good practices in CWL. + +*custom-types.cwl* + +~~~ +{% include cwl/custom-types.cwl %} +~~~ +{: .source} + +*custom-types.yml* + +~~~ +{% include cwl/custom-types.yml %} +~~~ +{: .source} + +On line 34, in `inputs:applications`, a list of applications to be used in the +search are imported as a custom object: + +``` +inputs: + proteinFile: + type: File + inputBinding: + prefix: --input + applications: + type: InterProScan-apps.yml#apps[]? + inputBinding: + itemSeparator: ',' + prefix: --applications +``` +{: .source} + +The reference to a custom type is a combination of the name of the file in which +the object is defined (`InterProScan-apps.yml`) and the name of the object +within that file (`apps`) that defines the custom type. The square brackets `[]` +mean that an array of the preceding type is expected, in this case the `apps` +type from the imported `InterProScan-apps.yaml` file + +The contents of the YAML file describing the custom type are given below: + +~~~ +{% include cwl/InterProScan-apps.yml %} +~~~ +{: .source} + +In order for the custom type to be used in the CWL description, it must be +imported. Imports are described in `requirements:SchemaDefRequirement`, as +below in the example `custom-types.cwl` description: + +``` +requirements: + ResourceRequirement: + ramMin: 10240 + coresMin: 3 + SchemaDefRequirement: + types: + - $import: InterProScan-apps.yml +``` +{: .source} + +Note also that the author of this CWL description has also included +`ResourceRequirement`s, specifying the minimum amount of RAM and number of cores +required for the tool to run successfully, as well as details of the version of +the software that the description was written for and other useful metadata. +These features are discussed further in other chapters of this user guide. + +[ips]: https://github.com/ebi-pf-team/interproscan diff --git a/_includes/cwl/InterProScan-apps.yml b/_includes/cwl/InterProScan-apps.yml new file mode 100644 index 00000000..e42779c3 --- /dev/null +++ b/_includes/cwl/InterProScan-apps.yml @@ -0,0 +1,18 @@ +type: enum +name: apps +symbols: + - TIGRFAM + - SFLD + - SUPERFAMILY + - Gene3D + - Hamap + - Coils + - ProSiteProfiles + - SMART + - CDD + - PRINTS + - PIRSF + - ProSitePatterns + - Pfam + - ProDom + - MobiDBLite diff --git a/_includes/cwl/custom-types.cwl b/_includes/cwl/custom-types.cwl new file mode 100644 index 00000000..75d1b294 --- /dev/null +++ b/_includes/cwl/custom-types.cwl @@ -0,0 +1,67 @@ +cwlVersion: v1.0 +class: CommandLineTool + +label: "InterProScan: protein sequence classifier" + +doc: | + Version 5.21-60 can be downloaded here: + https://github.com/ebi-pf-team/interproscan/wiki/HowToDownload + + Documentation on how to run InterProScan 5 can be found here: + https://github.com/ebi-pf-team/interproscan/wiki/HowToRun + +requirements: + ResourceRequirement: + ramMin: 10240 + coresMin: 3 + SchemaDefRequirement: + types: + - $import: InterProScan-apps.yml + +hints: + SoftwareRequirement: + packages: + interproscan: + specs: [ "https://identifiers.org/rrid/RRID:SCR_005829" ] + version: [ "5.21-60" ] + +inputs: + proteinFile: + type: File + inputBinding: + prefix: --input + applications: + type: InterProScan-apps.yml#apps[]? + inputBinding: + itemSeparator: ',' + prefix: --applications + +baseCommand: interproscan.sh + +arguments: + - valueFrom: $(inputs.proteinFile.nameroot).i5_annotations + prefix: --outfile + - valueFrom: TSV + prefix: --formats + - --disable-precalc + - --goterms + - --pathways + - valueFrom: $(runtime.tmpdir) + prefix: --tempdir + + +outputs: + i5Annotations: + type: File + format: iana:text/tab-separated-values + outputBinding: + glob: $(inputs.proteinFile.nameroot).i5_annotations + +$namespaces: + iana: https://www.iana.org/assignments/media-types/ + s: http://schema.org/ +$schemas: + - https://schema.org/docs/schema_org_rdfa.html + +s:license: "https://www.apache.org/licenses/LICENSE-2.0" +s:copyrightHolder: "EMBL - European Bioinformatics Institute" diff --git a/_includes/cwl/custom-types.yml b/_includes/cwl/custom-types.yml new file mode 100644 index 00000000..ec8fb9e6 --- /dev/null +++ b/_includes/cwl/custom-types.yml @@ -0,0 +1,3 @@ +proteinFile: + class: File + path: test_proteins.fasta diff --git a/_includes/cwl/test_proteins.fasta b/_includes/cwl/test_proteins.fasta new file mode 100644 index 00000000..ec34ed42 --- /dev/null +++ b/_includes/cwl/test_proteins.fasta @@ -0,0 +1,34 @@ +>Q97R95 +MKYKRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEAGHELILVSSGAIAAGFGALG +FKKRPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQILLTQDDFVDKRRYKNAHQAL +SVLLNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQADLLVFLTDVDGLYTGNPNS +DPRAKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAATIATESGVPVYICSSLKSDS +MIEAAEETEDGSYFVAQEKGLRTQKQWLAFYAQSQGSIWVDKGAAEALSQYGKSLLLSGI +VEAEGVFSYGDIVTVFDKESGKSLGKGRVQFGASALEDMLRSQKAKGVLIYRDDWISITP +EIQLLFTEF +>A2VDN9 +MEVKGKKKLTGKGTKMSQEKSKFHKNNDSGSSKTFPKKVVKEGGPKITSKNFEKTATKPGKKGVKQFKNKQQGDRIPKNK +FQQANKFNQKRKFQPDSKSDESAAKKPKWDEFKKKKKELKQSRQLSDKTNYDIVIRAKQIWEILRRKDCDKEKRVKLMSD +LQKLIQGKIKTIAFAHDSTRVIQCYIQFGNEEQRKQAFEELRGDLVELSKAKYSRNIVKKFLMYGSKAQIAEIIRSFKGH +VRKLLRHAEASAIVEYAYNDKAILEQRNMLTEELYGNTFQLYKSADHPTLDKVLEVQPEKLELIMDEMKQILTPMAQKEA +VIKHSLVHKVFLDFFTYAPPKLRSEMIEAIREAVVYLAHTHDGARVAMYCLWHGTPKDRKVIVKTMKTYIEKVANGQYSH +LVLLAAFDCIDDTKLVKQIIISEIINSLPNIVNDKYGRKVLLYLLSPRDPAHTVREIIEVLQKGDGNAHSKKDTEIRRRE +LLESISPALLSYLQGHAQEVVLDKSACVLVADILGTATGDVQPAMDAVASLAAAELHPGGKDGELHIAEHPAGHLVLKWL +IEQDKKMKERGREGCFAKTLIERVGVKNLKSWASVNRGAIILSSLLQSSDQEVANKVKAGLKSLIPALEKSKNTSKGIEM +LLEKLTA +>A2YIW7 +MAAEEGVVIACHNKDEFDAQMTKAKEAGKVVIIDFTASWCGPCRFIAPVFAEYAKKFPGAVFLKVDVDELKEVAEKYNVE +AMPTFLFIKDGAEADKVVGARKDDLQNTIVKHVGATAASASA +>P22298 +GRGLLPFVLLALGIXAPWAVEGAENALKGGACPPRKIVQCLRYEKPKCTSDWQCPDKKKC +CRDTCAIKCLNPVAITNPVKVKPGKCPVVYGQCMMLNPPNHCKTDSQCLGDLKCCKSMCG +KVCLTPVKA +>A0B6J9 +MSKIGKSIRLERIIDRKTRKTVIVPMDHGLTVGPIPGLIDLAAAVDKVAEGGANAVLGHM +GLPLYGHRGYGKDVGLIIHLSASTSLGPDANHKVLVTRVEDAIRVGADGVSIHVNVGAED +EAEMLRDLGMVARRCDLWGMPLLAMMYPRGAKVRSEHSVEYVKHAARVGAELGVDIVKTN +YTGSPETFREVVRGCPAPVVIAGGPKMDTEADLLQMVYDAMQAGAAGISIGRNIFQAENP +TLLTRKLSKIVHEGYTPEEAARLKL +>P02939 +MNRTKLVLGAVILGSTLLAGCSSNAKIDQLSTDVQTLNAKVDQLSNDVTAIRSDVQAAKD +DAARANQRLDNQAHSYRK \ No newline at end of file