30 January 2015

Filtering Fasta Sequences using the #NCBI C++ API. My notebook.

In my previous post (http://plindenbaum.blogspot.com/2015/01/compiling-c-hello-world-program-using.html) I've built a simple "Hello World" application using the
NCBI C++ toolbox (http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/). Here, I'm gonna to extend the code in order to create a program filtering FASTA sequences on their sizes.

This new application FastaFilterSize needs three new arguments:

  • '-m' min size of the fasta sequence
  • '-M' max size of the fasta sequence
  • '-v' inverse selection

the method FastaFilterSize::Init() is used to register those optional arguments

/* optional argument for min size */
arg_desc->AddOptionalKey(
    "m",
    "min_size",
    "min size inclusive",
    CArgDescriptions::eInteger
    );
/* optional argument for max size */
arg_desc->AddOptionalKey(
    "M",
    "max_size",
    "max size inclusive",
    CArgDescriptions::eInteger
    );
/* add simple flag to inverse selection */
arg_desc->AddFlag("v", 
    "inverse selection",
    true
    );

the FastaFilterSize::Run() method is the workhorse of the program. Here, we get the output stream that will be used by a CFastaOstream to print the filtered Fasta sequences:

(...)
/* get output stream */
CNcbiOstream& out =  this->GetArgs()["o"].AsOutputFile();
/* create a FASTA writer */
CFastaOstream* writer = new CFastaOstream(out);
(...)

We get the input stream that will be used by a CFastaReader to read the FASTA sequences to be filtered.

/* get input stream */
CNcbiIstream& input_stream  = this->GetArgs()["i"].AsInputFile();
CFastaReader reader(input_stream ,
        CFastaReader::fNoParseID /* http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/fasta_8hpp_source.html */
        );

We get the optional values for min-length and max-length:

if (this->GetArgs()["m"])
    {
    min_inclusive = this->GetArgs()["m"].AsInteger();
    }
if (this->GetArgs()["M"])
    {
    max_inclusive = this->GetArgs()["M"].AsInteger();
    }

We get the 'inverse' flag.

bool invert=this->GetArgs()["v"];

While a Fasta sequence can be read, we check its size and we print it if needed:

while( !reader.AtEOF() )
    {
    /* read next sequence */
    CRef<CSeq_entry> seq(reader.ReadOneSeq());
    /* should we print the sequence ?*/
    if((seq->GetSeq().GetLength()>= min_inclusive && 
        seq->GetSeq().GetLength()<= max_inclusive
        )!= invert)
        {
        /* yes, print */
        writer->Write(*seq);
        }
    }

All in one:

#include <memory>
#include <limits>
#include <objtools/readers/fasta.hpp>
#include "corelib/ncbiapp.hpp"
#include <objects/seq/Bioseq.hpp>
#include <objects/seq/Bioseq.hpp>
#include <objects/seq/Delta_ext.hpp>
#include <objects/seq/Delta_seq.hpp>
#include <objects/seq/NCBIeaa.hpp>
#include <objects/seq/IUPACaa.hpp>
#include <objects/seq/IUPACna.hpp>
#include <objects/seq/Seg_ext.hpp>
#include <objects/seq/Seq_annot.hpp>
#include <objects/seq/Seq_descr.hpp>
#include <objects/seq/Seq_ext.hpp>
#include <objects/seq/Seq_hist.hpp>
#include <objects/seq/Seq_inst.hpp>
#include <objects/seq/Seq_literal.hpp>
#include <objects/seq/Seqdesc.hpp>
#include <objects/seq/seqport_util.hpp>
#include <objects/seqalign/Dense_seg.hpp>
#include <objects/seqalign/Seq_align.hpp>
#include <objects/seqloc/Seq_id.hpp>
#include <objects/seqloc/Seq_interval.hpp>
#include <objects/seqloc/Seq_loc.hpp>
#include <objects/seqloc/Seq_loc_mix.hpp>
#include <objects/seqloc/Seq_point.hpp>
#include <objects/seqset/Bioseq_set.hpp>
#include <objects/seqset/Seq_entry.hpp>
#include <objtools/readers/message_listener.hpp>
#include <objtools/readers/line_error.hpp>
#include <objtools/seqmasks_io/mask_reader.hpp>
#include <objtools/seqmasks_io/mask_writer.hpp>
#include <objtools/seqmasks_io/mask_fasta_reader.hpp>
#include <objmgr/util/sequence.hpp>
#include <ctype.h>

USING_NCBI_SCOPE;
USING_SCOPE(objects);

/**
 * FastaFilterSize
 *    
 *  Motivation:
 *      filter sequences on size
 *  Author:
 *      Pierre Lindenbaum PhD 2015
 *
 */
class FastaFilterSize : public CNcbiApplication /* extends a basic NCBI application defined in c++/include/corelib/ncbiapp.hpp */
    {
    public:

        /* constructor, just set the version to '1.0.0'*/
        FastaFilterSize()
            {
            CRef<CVersion> version(new CVersion());
            version->SetVersionInfo(1, 0, 0);
            this->SetFullVersion(version);
            }
            
            
        /* called to initialize rge program.
        *  The default behavior of this in 'CNcbiApplication' is "do nothing".
        *  Here, we set the command line arguments.
        */
        virtual void Init()
            {
            CArgDescriptions* arg_desc = new CArgDescriptions ; /* defined in /c++/include/corelib/ncbiargs.hpp */
            arg_desc->SetUsageContext(
                GetArguments().GetProgramBasename(),
                "A program filtering fasta sequences on size"
                );
            /* argument for input file */
            arg_desc->AddDefaultKey(
                    "i",/* name */
                    "input_file_name",/* synopsis */
                    "input file name",/* comment */
                    CArgDescriptions::eInputFile, /* argument type */
                    "-" /* default value*/
                    );
           /* argument for output file */
           arg_desc->AddDefaultKey(
                    "o",/* name */
                    "output_file_name",/* synopsis */
                    "output file name",/* comment */
                    CArgDescriptions::eOutputFile, /* argument type */
                    "-" /* default value*/
                    );
            /* optional argument for min size */
            arg_desc->AddOptionalKey(
                    "m",
                    "min_size",
                    "min size inclusive",
                    CArgDescriptions::eInteger
                    );
            /* optional argument for max size */
            arg_desc->AddOptionalKey(
                    "M",
                    "max_size",
                    "max size inclusive",
                    CArgDescriptions::eInteger
                    );
            
            /* add simple flag to inverse selection */
            arg_desc->AddFlag("v", 
                "inverse selection",
                true
                );
            
            /* push this command args */
            SetupArgDescriptions( arg_desc );
            }   
        
        /* class destructor */
        virtual ~FastaFilterSize()
            {
            }
        
        /* workhorse of the program */
        virtual int Run()
            {
            /* get output stream */
            CNcbiOstream& out =  this->GetArgs()["o"].AsOutputFile();
            /* create a FASTA writer */
            CFastaOstream* writer = new CFastaOstream(out);
            /* shall we inverse the selection ? */
            bool invert=this->GetArgs()["v"];

            /* get input stream */
            CNcbiIstream& input_stream  = this->GetArgs()["i"].AsInputFile();
            /* init min and max inclusive */
            TSeqPos min_inclusive=0;
            TSeqPos max_inclusive= std::numeric_limits<TSeqPos>::max();
            /* if min and max set by user */
            if (this->GetArgs()["m"])
                {
                min_inclusive = this->GetArgs()["m"].AsInteger();
                }
            if (this->GetArgs()["M"])
                {
                max_inclusive = this->GetArgs()["M"].AsInteger();
                }
            /* create a FASTA parser */
            CFastaReader reader(input_stream ,
                    CFastaReader::fNoParseID /* http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/fasta_8hpp_source.html */
                    );
            /* while we can read something */
            while( !reader.AtEOF() )
                {
                /* read next sequence */
                CRef<CSeq_entry> seq(reader.ReadOneSeq());
                
                /* should we print the sequence ?*/
                if((seq->GetSeq().GetLength()>= min_inclusive && 
                    seq->GetSeq().GetLength()<= max_inclusive
                    )!= invert)
                    {
                    /* yes, print */
                    writer->Write(*seq);
                    }
                }
            /* cleanup */
            delete writer;
            out.flush();
            return 0;
            }
    };

int main(int argc,char** argv)
    {
    return FastaFilterSize().AppMain(
        argc,
        argv,
        0, /* envp Environment pointer */
        eDS_ToStderr,/* log message. In /c++/include/corelib/ncbidiag.hpp  */
        0, /* Specify registry to load, as per LoadConfig() */
        "filterfasta" /* Specify application name */
        );
    }

Compiling

g++  -std=gnu++11  -finline-functions -fstrict-aliasing -fomit-frame-pointer    
-I/commun/data/users/lindenb/src/blast2.2.30/ncbi-blast-2.2.30+-src/c++/include 
-I/commun/data/users/lindenb/src/blast2.2.30/ncbi-blast-2.2.30+-src/c++/ReleaseM
T/inc  -Wno-deprecated -Wno-deprecated-declarations -o filtersize filtersize.cpp
 -L/commun/data/users/lindenb/src/blast2.2.30/ncbi-blast-2.2.30+-src/c++/Release
MT/lib -lblast_app_util -lblastinput-static -lncbi_xloader_blastdb-static -lncbi
_xloader_blastdb_rmt-static -lxblastformat-static -lalign_format-static -ltaxon1
-static -lblastdb_format-static -lgene_info-static -lxalnmgr-static -lblastxml-s
tatic -lblastxml2-static -lxcgi-static -lxhtml-static -lxblast-static -lxalgobla
stdbindex-static -lcomposition_adjustment-static -lxalgodustmask-static -lxalgow
inmask-static -lseqmasks_io-static -lseqdb-static -lblast_services-static -lxobj
util-static -lxobjread-static -lvariation-static -lcreaders-static -lsubmit-stat
ic -lxnetblastcli-static -lxnetblast-static -lblastdb-static -lscoremat-static -
ltables-static -lxalnmgr-static -lncbi_xloader_genbank-static -lncbi_xreader_id1
-static -lncbi_xreader_id2-static -lncbi_xreader_cache-static -lxconnext-static 
-lxconnect-static -ldbapi_driver-static -lncbi_xreader-static -lxconnext-static 
-lxconnect-static -lid1-static -lid2-static -lseqsplit-static -lxcompress-static
 -lxobjmgr-static -lgenome_collection-static -lseqedit-static -lseqset-static -l
seq-static -lseqcode-static -lsequtil-static -lpub-static -lmedline-static -lbib
lio-static -lgeneral-static -lxser-static -lxutil-static -lxncbi-static -lgomp -
lz -lbz2 -ldl -lnsl -lrt -lm -lpthread -L/commun/data/users/lindenb/src/blast2.2
.30/ncbi-blast-2.2.30+-src/c++/ReleaseMT/lib -lblast_app_util -lblastinput-stati
c -lncbi_xloader_blastdb-static -lncbi_xloader_blastdb_rmt-static -lxblastformat
-static -lalign_format-static -ltaxon1-static -lblastdb_format-static -lgene_inf
o-static -lxalnmgr-static -lblastxml-static -lblastxml2-static -lxcgi-static -lx
html-static -lxblast-static -lxalgoblastdbindex-static -lcomposition_adjustment-
static -lxalgodustmask-static -lxalgowinmask-static -lseqmasks_io-static -lseqdb
-static -lblast_services-static -lxobjutil-static -lxobjread-static -lvariation-
static -lcreaders-static -lsubmit-static -lxnetblastcli-static -lxnetblast-stati
c -lblastdb-static -lscoremat-static -ltables-static -lxalnmgr-static -lncbi_xlo
ader_genbank-static -lncbi_xreader_id1-static -lncbi_xreader_id2-static -lncbi_x
reader_cache-static -lxconnext-static -lxconnect-static -ldbapi_driver-static -l
ncbi_xreader-static -lxconnext-static -lxconnect-static -lid1-static -lid2-stati
c -lseqsplit-static -lxcompress-static -lxobjmgr-static -lgenome_collection-stat
ic -lseqedit-static -lseqset-static -lseq-static -lseqcode-static -lsequtil-stat
ic -lpub-static -lmedline-static -lbiblio-static -lgeneral-static -lxser-static 
-lxutil-static -lxncbi-static -lgomp -lz -lbz2 -ldl -lnsl -lrt -lm -lpthread

Testing

get help

$ ./filtersize  -help
USAGE
  filtersize [-h] [-help] [-xmlhelp] [-i input_file_name]
    [-o output_file_name] [-m min_size] [-M max_size] [-v]
    [-logfile File_Name] [-conffile File_Name] [-version] [-version-full]
    [-dryrun]

DESCRIPTION
   A program filtering fasta sequences on size

OPTIONAL ARGUMENTS
 (...)
 -i <File_In>
   input file name
   Default = `-'
 -o <File_Out>
   output file name
   Default = `-'
 -m <Integer>
   min size inclusive
 -M <Integer>
   max size inclusive
 -v
   inverse selection
(...)

test with some fasta sequences:

echo -e  ">seq1\nAATCGTACGCTAG\n>seq2\nAATCGTACGATGCAAAAAAAAAAAATCTAG\n>seq3\nAATC\n>seq3\nAATC" |\
./filtersize -m 10 -M 20

>lcl|1 seq1
AATCGTACGCTAG
echo -e  ">seq1\nAATCGTACGCTAG\n>seq2\nAATCGTACGATGCAAAAAAAAAAAATCTAG\n>seq3\nAATC\n>seq3\nAATC" |\
./filtersize -m 10 -M 20 -v 

>lcl|2 seq2
AATCGTACGATGCAAAAAAAAAAAATCTAG
>lcl|3 seq3
AATC
>lcl|4 seq3
AATC

That's it,
Pierre

29 January 2015

Compiling a C++ 'Hello world' program using the #NCBI C++ toolbox: my notebook.

This post is my notebook for compiling a simple C++ application using the NCBI C++ toolbox (http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/).

This application prints 'Hello world' and takes two arguments:

  • '-o' to specificiy the output filename (default is standard output)
  • '-n' to set the name to be printed (default: "Word !")

The code I used is the one containing in the distribution of blast 2.2.30

The program itself is a C++ Class, it extends the class CNcbiApplication

(...)
class HelloApp : public CNcbiApplication
    {
    (...)
    }

In the constructor, we just set the name and the version of the program:

HelloApp::HelloApp()
    {
    CRef<CVersion> version(new CVersion());
    version->SetVersionInfo(1, 0, 0);
    this->SetFullVersion(version);
    }

The destructor is void

HelloApp::~HelloApp()
    {
    }

an HelloApp::Init() method is used to register the command line arguments.

void HelloApp::Init()
    {
    CArgDescriptions* arg_desc = new CArgDescriptions ;
    arg_desc->SetUsageContext(
        GetArguments().GetProgramBasename(),
        "Hello NCBI"
        );
    (...)

in HelloApp::Init() we register the '-o' argument:

arg_desc->AddDefaultKey(
    "o",/* name */
    "output_file_name",/* synopsis */
    "output file name",/* comment */
    CArgDescriptions::eOutputFile, /* argument type */
    "-" /* default value*/
    );

and the '-n' argument:

arg_desc->AddDefaultKey(
    "n",/* name */
    "name",/* synopsis */
    "Name",/* comment */
    CArgDescriptions::eString, /* argument type */
    "World !" /* default value*/
    );

once the two arguments have been registered, the new command line is pushed in the application:

SetupArgDescriptions( arg_desc );

the HelloApp::Run() method is the workhorse of the program. Here, we get the output stream and we print the name.

int HelloApp::Run()
    {
    /* get output stream */
    CNcbiOstream& out =  this->GetArgs()["o"].AsOutputFile();
    out << "Hello " << this->GetArgs()["n"].AsString() << endl;
    return 0;
    }

All in one:

#include <memory>
#include <limits>
#include "corelib/ncbiapp.hpp"
#include <ctype.h>



USING_NCBI_SCOPE;

/**
 * Hello NCBI
 *      testing NCBI C++ toolbox
 *  Motivation:
 *      filter sequences on size
 *  Author:
 *      Pierre Lindenbaum PhD 2015
 *
 */
class HelloApp : public CNcbiApplication /* extends a basic NCBI application defined in c++/include/corelib/ncbiapp.hpp */
    {
    public:

        /* constructor, just set the version to '1.0.0'*/
        HelloApp()
            {
            CRef<CVersion> version(new CVersion());
            version->SetVersionInfo(1, 0, 0);
            this->SetFullVersion(version);
            }
            
            
        /* called to initialize rge program.
        *  The default behavior of this in 'CNcbiApplication' is "do nothing".
        *  Here, we set the command line arguments.
        */
        virtual void Init()
            {
            CArgDescriptions* arg_desc = new CArgDescriptions ; /* defined in /c++/include/corelib/ncbiargs.hpp */
            arg_desc->SetUsageContext(
                GetArguments().GetProgramBasename(),
                "Hello NCBI"
                );
            
           /* argument for output file */
           arg_desc->AddDefaultKey(
                    "o",/* name */
                    "output_file_name",/* synopsis */
                    "output file name",/* comment */
                    CArgDescriptions::eOutputFile, /* argument type */
                    "-" /* default value*/
                    );
            /* argument for name */
           arg_desc->AddDefaultKey(
                    "n",/* name */
                    "name",/* synopsis */
                    "Name",/* comment */
                    CArgDescriptions::eString, /* argument type */
                    "World !" /* default value*/
                    );
            
            /* push this command args */
            SetupArgDescriptions( arg_desc );
            }   
        
        /* class destructor */
        virtual ~HelloApp()
            {
            }
        
        /* workhorse of the program */
        virtual int Run()
            {
            /* get output stream */
            CNcbiOstream& out =  this->GetArgs()["o"].AsOutputFile();
            out << "Hello " << this->GetArgs()["n"].AsString() << endl;
            return 0;
            }
    };

int main(int argc,char** argv)
    {
    return HelloApp().AppMain(
        argc,
        argv,
        0, /* envp Environment pointer */
        eDS_ToStderr,/* log message. In /c++/include/corelib/ncbidiag.hpp  */
        0, /* Specify registry to load, as per LoadConfig() */
        "hello" /* Specify application name */
        );
    }

Compiling

there is a bunch of libaries available under ncbi-blast-2.2.30+-src/c++/ReleaseMT/lib and it's hard to know which are the one required for linking. I put everything in my command line. All in one it looks like:

g++  -std=gnu++11  -finline-functions -fstrict-aliasing -fomit-frame-pointer    
-Iblast2.2.30/ncbi-blast-2.2.30+-src/c++/include -Iblast2.2.30/ncbi-blast-2.2.30
+-src/c++/ReleaseMT/inc  -Wno-deprecated -Wno-deprecated-declarations -o hello h
ello.cpp -Lblast2.2.30/ncbi-blast-2.2.30+-src/c++/ReleaseMT/lib -lblast_app_util
 -lblastinput-static -lncbi_xloader_blastdb-static -lncbi_xloader_blastdb_rmt-st
atic -lxblastformat-static -lalign_format-static -ltaxon1-static -lblastdb_forma
t-static -lgene_info-static -lxalnmgr-static -lblastxml-static -lblastxml2-stati
c -lxcgi-static -lxhtml-static -lxblast-static -lxalgoblastdbindex-static -lcomp
osition_adjustment-static -lxalgodustmask-static -lxalgowinmask-static -lseqmask
s_io-static -lseqdb-static -lblast_services-static -lxobjutil-static -lxobjread-
static -lvariation-static -lcreaders-static -lsubmit-static -lxnetblastcli-stati
c -lxnetblast-static -lblastdb-static -lscoremat-static -ltables-static -lxalnmg
r-static -lncbi_xloader_genbank-static -lncbi_xreader_id1-static -lncbi_xreader_
id2-static -lncbi_xreader_cache-static -lxconnext-static -lxconnect-static -ldba
pi_driver-static -lncbi_xreader-static -lxconnext-static -lxconnect-static -lid1
-static -lid2-static -lseqsplit-static -lxcompress-static -lxobjmgr-static -lgen
ome_collection-static -lseqedit-static -lseqset-static -lseq-static -lseqcode-st
atic -lsequtil-static -lpub-static -lmedline-static -lbiblio-static -lgeneral-st
atic -lxser-static -lxutil-static -lxncbi-static -lgomp -lz -lbz2 -ldl -lnsl -lr
t -lm -lpthread -Lblast2.2.30/ncbi-blast-2.2.30+-src/c++/ReleaseMT/lib -lblast_a
pp_util -lblastinput-static -lncbi_xloader_blastdb-static -lncbi_xloader_blastdb
_rmt-static -lxblastformat-static -lalign_format-static -ltaxon1-static -lblastd
b_format-static -lgene_info-static -lxalnmgr-static -lblastxml-static -lblastxml
2-static -lxcgi-static -lxhtml-static -lxblast-static -lxalgoblastdbindex-static
 -lcomposition_adjustment-static -lxalgodustmask-static -lxalgowinmask-static -l
seqmasks_io-static -lseqdb-static -lblast_services-static -lxobjutil-static -lxo
bjread-static -lvariation-static -lcreaders-static -lsubmit-static -lxnetblastcl
i-static -lxnetblast-static -lblastdb-static -lscoremat-static -ltables-static -
lxalnmgr-static -lncbi_xloader_genbank-static -lncbi_xreader_id1-static -lncbi_x
reader_id2-static -lncbi_xreader_cache-static -lxconnext-static -lxconnect-stati
c -ldbapi_driver-static -lncbi_xreader-static -lxconnext-static -lxconnect-stati
c -lid1-static -lid2-static -lseqsplit-static -lxcompress-static -lxobjmgr-stati
c -lgenome_collection-static -lseqedit-static -lseqset-static -lseq-static -lseq
code-static -lsequtil-static -lpub-static -lmedline-static -lbiblio-static -lgen
eral-static -lxser-static -lxutil-static -lxncbi-static -lgomp -lz -lbz2 -ldl -l
nsl -lrt -lm -lpthread

Testing

run without arguments:

$ ./hello 
Hello World !

run with arguments:

$ ./hello -n Pierre
Hello Pierre
$ ./hello -n Pierre -o test.txt && cat test.txt 
Hello Pierre

errors:

$ ./hello -x Pierre
USAGE
  hello [-h] [-help] [-xmlhelp] [-o output_file_name] [-n name]
    [-logfile File_Name] [-conffile File_Name] [-version] [-version-full]
    [-dryrun]

DESCRIPTION
   Hello NCBI

Use '-help' to print detailed descriptions of command line arguments
========================================================================

Error: Unknown argument: "x"

get help:

$ ./hello -help
USAGE
  hello [-h] [-help] [-xmlhelp] [-o output_file_name] [-n name]
    [-logfile File_Name] [-conffile File_Name] [-version] [-version-full]
    [-dryrun]

DESCRIPTION
   Hello NCBI

OPTIONAL ARGUMENTS
 -h
   Print USAGE and DESCRIPTION;  ignore all other parameters
 -help
   Print USAGE, DESCRIPTION and ARGUMENTS; ignore all other parameters
 -xmlhelp
   Print USAGE, DESCRIPTION and ARGUMENTS in XML format; ignore all other
   parameters
 -o <File_Out>
   output file name
   Default = `-'
 -n <String>
   Name
   Default = `World !'
 -logfile <File_Out>
   File to which the program log should be redirected
 -conffile <File_In>
   Program's configuration (registry) data file
 -version
   Print version number;  ignore other arguments
 -version-full
   Print extended version data;  ignore other arguments
 -dryrun
   Dry run the application: do nothing, only test all preconditions

get xml help:

$ ./hello -xmlhelp | xmllint --format -
<?xml version="1.0"?>
<ncbi_application xmlns="ncbi:application" xmlns:xs="http://www.w3.org/2001/XMLSchema-instance" xs:schemaLocation="ncbi:application ncbi_application.xsd">
  <program type="regular">
    <name>hello</name>
    <version>1.0.0</version>
    <description>Hello NCBI</description>
  </program>
  <arguments>
    <key name="conffile" type="File_In" optional="true">
      <description>Program's configuration (registry) data file</description>
      <synopsis>File_Name</synopsis>
    </key>
    <key name="logfile" type="File_Out" optional="true">
      <description>File to which the program log should be redirected</description>
      <synopsis>File_Name</synopsis>
    </key>
    <key name="n" type="String" optional="true">
      <description>Name</description>
      <synopsis>name</synopsis>
      <default>World !</default>
    </key>
    <key name="o" type="File_Out" optional="true">
      <description>output file name</description>
      <synopsis>output_file_name</synopsis>
      <default>-</default>
    </key>
    <flag name="dryrun" optional="true">
      <description>Dry run the application: do nothing, only test all preconditions</description>
    </flag>
    <flag name="h" optional="true">
      <description>Print USAGE and DESCRIPTION;  ignore all other parameters</description>
    </flag>
    <flag name="help" optional="true">
      <description>Print USAGE, DESCRIPTION and ARGUMENTS; ignore all other parameters</description>
    </flag>
    <flag name="version" optional="true">
      <description>Print version number;  ignore other arguments</description>
    </flag>
    <flag name="version-full" optional="true">
      <description>Print extended version data;  ignore other arguments</description>
    </flag>
    <flag name="xmlhelp" optional="true">
      <description>Print USAGE, DESCRIPTION and ARGUMENTS in XML format; ignore all other parameters</description>
    </flag>
  </arguments>
</ncbi_application>

That's it,
Pierre