Difference between revisions of "GMOD Middleware"

From GMOD
Jump to: navigation, search
(HBM Example CVTerm)
(Presentation by Chinmay Patel)
Line 2,107: Line 2,107:
  
 
====Presentation by Chinmay Patel====
 
====Presentation by Chinmay Patel====
 +
 +
GeneDB
 +
 +
* GeneDB is the organism data resource and annotation database for the Pathogen Sequencing Unit (PSU) at the Sanger Institute, UK
 +
* Contains 37 organisms, which is expected to grow to 62
 +
* Currently migrating to chado schema
 +
* Java API with two engines Hibernate & iBatis
 +
 +
=====Technical - Connections=====
 +
 +
Connections are configured in the Spring configuration file
 +
<xml>
 +
        <bean id="dataSource" class="org.apache.commons.dbcp.BasicDataSource">
 +
 +
              <property name="driverClassName" value="org.postgresql.Driver" />
 +
 +
          <property name="url" value="jdbc:postgresql://holly.sanger.ac.uk:5432/chado" />
 +
 +
              <property name="username" value="DELIBERATELY_BOGUS_NAME"/>
 +
 +
              <property name="password" value="WIBBLE" />
 +
 +
    </bean>
 +
</xml>
 +
* Uses a connection pool
 +
* Connection to the database is specified graphically, so the iBatis configuration file has  variables for the location:
 +
<xml>
 +
<property name="JDBC.Driver" value="org.postgresql.Driver"/>
 +
 +
<property name="JDBC.ConnectionURL” value="jdbc:postgresql://${chado}"/>
 +
 +
<property name="JDBC.Username" value="${username}"/>
 +
 +
<property name="JDBC.Password" value="${password}"/>
 +
</xml>
 +
 +
* provide database location, username & password
 +
* select from scrollable list of feature with residues (organisms in separate Postgres schemas) what to open in Artemis
 +
 +
=====Technical - Code Generation=====
 +
 +
* The shared interface and hibernate implementation  were originally generated
 +
* There’s no explicit code generation (although the  Spring and Hibernate runtimes may use them  behinds the scenes)
 +
 +
=====Technical - Transactions=====
 +
 +
* Transactions are fully supported
 +
* There’s no explicit code generation (although the  Spring and Hibernate runtimes may use them behind  the scenes)
 +
 +
=====Demo – Sample Problem=====
 +
 +
Creating a gene
 +
<java>
 +
genes[0] = new Feature(ORG, GENE, "xfile", false, false, now, now);
 +
 +
genes[0].setSeqLen(1029);                                           
 +
sequenceDao.persist(genes[0]);
 +
 +
FeatureLoc loc = new FeatureLoc(SOURCE_FEATURE, genes[0], 13691, false, 14720, false, (short)1, 0, 0 ,0);
 +
 +
sequenceDao.persist(loc);
 +
 +
addFeatureProp(genes[0], "description", "A test gene for GMOD meeting");
 +
 +
addSynonymsToFeature(genes[0], "mulder", "scully");
 +
 +
createExon("exon1", genes[0], 13691, 13767, now, 0);
 +
 +
createExon("exon2", genes[0], 14687, 14720, now, 1);
 +
</java>
 +
 +
Retrieve a gene
 +
<java>
 +
Feature f = sequenceDao.getFeatureByUniqueName("xfile");
 +
displayGene(f);
 +
</java>
 +
 +
Update a gene
 +
<java>
 +
genes[0].setUniqueName("x-file");
 +
 +
sequenceDao.merge(genes[0]);
 +
</java>
 +
 +
=====Demo – Sample Problem=====
 +
 +
<java>
 +
private Feature createExon(String name, Feature gene, int min, int max, Timestamp now, int rank) {
 +
 +
        Feature exon = new Feature(ORG, EXON, name, false, false, now, now);
 +
        exon.setSeqLen(max-min);
 +
        sequenceDao.persist(exon);
 +
 +
        FeatureLoc loc = new FeatureLoc(SOURCE_FEATURE, exon, min, false, max, false,     
 +
                                      (short)1, 0, 0 ,0);
 +
        sequenceDao.persist(loc);
 +
 +
        return exon;
 +
 +
}
 +
</java>
 +
 +
=====Demo – Sample Problem=====
 +
 +
<xml>
 +
<st:section name="Naming" id="gene_naming" collapsed="false" collapsible="false"
 +
hideIfEmpty="true">
 +
  <dl>
 +
            <dt><b>symbol:</b></dt>
 +
            <dd>${feature.uniqueName}</dd>
 +
  </dl>
 +
  <db:synonym name="synonym" var="name" collection="${feature.featureSynonyms}">
 +
    <br /><b>Synonym:</b> <db:list-string collection="${name}" />
 +
  </db:synonym>
 +
  <dt><b>Type:</b></dt>
 +
  <dd>${feature.cvTerm.name}</dd>
 +
       
 +
<st:section name="Exons" collapsed="false" collapsible="true" hideIfEmpty="true">
 +
            <display:table name="exons" uid="tmp" pagesize="30" class="simple" cellspacing="0"
 +
cellpadding="4">
 +
                <display:column property="uniqueName" title="Exon"/>
 +
                <display:column property="featureLocsForSrcFeatureId.fmin" title="Start"/>
 +
                <display:column property="featureLocsForSrcFeatureId.fmax" title="end"/>
 +
            </display:table>
 +
</st:section>
 +
 +
<st:section name="cds" collapsible="true">
 +
            <b>${feature.residues}</b>
 +
</st:section>
 +
</xml>
  
 
===Conclusions===
 
===Conclusions===

Revision as of 21:32, 23 January 2007

Contents

Middleware for Chado databases

Ongoing Documentation

This page has been temporarily created by the GMOD developers for the purposes of discussing Chado and middleware for Chado. It will be moved to a permanent location shortly.

Middleware Evaluation January 2007

A group of some 50 GMOD developers met at the annual meeting to discuss middleware. This one day meeting had the following general goals:

  • To educate GMOD programmers on methods and practices for Middleware
  • To facilitate discussion on the best methods
  • To guide GMOD to a uniform Middleware layer
  • To generate this central reference document for Middleware projects, including:
    • Platform information
    • Strengths & weaknesses of different Middleware packages
    • Specific examples of how one would use a given middleware package

Introduction

One of the key characteristics of the GMOD software project is the variety of approaches and components that it supports. This applies to applications, database schemas, as well as to middleware, a software layer that mediates the exchange of data between the applications and the databases. Despite this diversity certain applications and schemas have emerged as key supported components in GMOD, such as the GBrowse application and the Chado schema, to name just two. However, a consensus view has not emerged with respect to middleware, and there are certainly a number of different middleware packages that have been used in the GMOD world, coming from within this world and from the larger world of open source.

In late 2006 the GMOD developers took note of the large number of middleware packages in use and elected to embark on a short-term study to evaluate and compare these packages. The primary motivation here was to select or recommend certain packages over others specifically within the GMOD context. The assumption is that making such recommendations will serve to focus the developers' effort on a smaller number of packages. Clearly it's also assumed that such a focus will inevitably lead to greater support for and use of those recommended packages, and that all will GMOD will benefit.

Another purpose of this study is to educate GMOD programmers on best practices concerning the use and development of middleware. It's expected that common agreement on these practices will lead to the development of more effective software as well as the best use of the software in practice. Finally, this study should generate a central reference document on these different middleware packages used in GMOD. This reference will contain platform- and language-specific information as well as descriptions of the strengths and weaknesses of the packages that can be used by GMOD developers when considering middleware.

General Evaluation Criteria

The GMOD developers proposed that each presenter provide some basic information about each middleware package, both general and technical. In addition each middleware application was asked to address a set of sample problems, shown below. These example problems are thought to typify some of the common functions that the scientist may need when working with their own database. It was understood that not all software would be able to handle all aspects of the sample problems and this demonstration was not intended to be live.

Problem 1

Enter the information about the following three novel genes, including the associated mRNA structures, into your database. Print the assigned feature_id for each inserted gene.

Note:

  • The coordinates are given in exact coordinates.
  • Use the organism_id for your organism
  • Store description in the chado table featureprop
  • A sequence in fasta format (see Fake Chromosome) should be loaded as genomic sequence, either chromosome or contig -- this will be used as a srcfeature in featureloc

Gene descriptions:

symbol: xfile
synonyms: mulder, scully
description: A test gene for GMOD meeting
mRNA Feature
     exon_1: 
       start: 13691
       end: 13767
       strand: 1
       srcFeature_id: Id of genomic sample
     exon_2: 
       start: 14687
       end: 14720
       strand: 1
       srcFeature_id: Id of genomic sample

symbol: x-men
synonyms: wolverine
  mRNA Feature
     exon_1: 
       start: 12648
       end: 13136
       strand: 1
       srcFeature_id: Id of genomic sample

symbol: x-ray
synonyms: none
     exon: 
       start: 1703
       end: 1900
       strand: 1
       srcFeature_id: Id of genomic sample
Problem 2

Retrieve and print the following report for gene xfile (the coding sequence and exon coordinates are derived from the associated mRNA feature). The results should resemble the following:

symbol: xfile
synonyms: mulder, scully
description: A test gene for GMOD meeting
type: gene
exon1 start: 13691
exon1 end: 13767
exon2 start: 14687
exon2 end: 14720
>xfile cds 
ATGGCGTTAGTATTCATGGTTACTGGTTTCGCTACTGATATCACCCAGCGTGTAGGCTGT
GGAATCGAACACTGGTATTGTATAAATGTTTGTGAATACACTGAGAAATAA
Problem 3

Update the gene xfile: change the name symbol to x-file and retrieve the changed record. Regenerate the report from Problem 1. The results should resemble the following:

symbol: x-file
synonyms: mulder, scully
description: A test gene for GMOD meeting
type: gene
exon1 start: 13691
exon1 end: 13767
exon2 start: 14687
exon2 end: 14720
>x-file cds
ATGGCGTTAGTATTCATGGTTACTGGTTTCGCTACTGATATCACCCAGCGTGTAGGCTGT
GGAATCGAACACTGGTATTGTATAAATGTTTGTGAATACACTGAGAAATAA
Problem 4

Search for all genes with symbols starting with x-. With the results produce the following simple result list (organism will vary):

1323    x-file     Xenopus laevis
1324    x-men   Xenopus laevis
1325    x-ray     Xenopus laevis
Problem 5

Delete the gene x-ray using the geneId. Run the search and report in Problem 4 again to show the delete has taken place, with a result resembling the following:

1323    x-file     Xenopus laevis
1324    x-men   Xenopus laevis

Object-Relational Mapping Principles

Presentation by Sohel Merchant

Sohel Merchant, Bioinformatics Software Engineer at dictyBase, Center for Genetic Medicine, Northwestern University, Chicago. This is an edited version of Sohel's presentation.

Outline
  • The Problem
  • Solutions
  • ORM
  • Perl – Class::DBI
  • Summary
The Problem
  • Developers need to perform Create, Retrieve, Update, Delete (aka CRUD) operations on data inside an application.
  • The real world objects represented using a programming language needs to be stored in databases
  • Using relational databases to store object-oriented data leads to a semantic gap
  • RDBMS have fixed types, but OO can have more complicated user defined types.
Solutions
  • Data Access Object (DAO)
  • Developer writes a class which contains one attribute for each field in the table
  • Methods for CRUD typically contains JDBC/DBI code with the necessary SQL statements.
  • Object Relational Mapping (ORM), WikiPedia:
    • “ORM is a programming technique that links databases to object-oriented language concepts, creating (in effect) a virtual object database.“
  • Developer needs to configure the ORM
  • Less amount of manual coding
  • CRUD methods are automatically generated by the ORM layer
ORM

ORM solutions

  • Perl
    • Class::DBI
  • Java
    • EJB
    • Hibernate
    • JDO
    • iBatis
Perl - Class::DBI
  • Provides a simple interfaces for wrapping Perl classes around a database tables
  • Tables are mapped directly to objects
  • The table column name are mapped to the get/set methods
  • Can be used with transactions
Class::DBI

Defining a class in Class::DBI to represent a table:

  CVTERM
  cvterm_id
  cv_id
  name
  definition
  dbxref_id

Corresponding code:

<perl> package Chado::Cvterm; use base 'Chado::DBI'; Chado::Cvterm->set_up_table('Cvterm'); </perl>

Class::DBI - CRUD

<perl>

    1. Create

$term_dbobj = Chado::Cvterm->create({

                              name      => ”DUMMY TERM”,
                              cv_id     => 1,
                              dbxref_id => 125
                       });
    1. Retrieve

$term_dbobj = Chado::Cvterm->retrieve(2);

    1. Update

$term_dbobj->name( $term->name() ); $term_dbobj->definition( $term->definition );

    1. Delete

$term_dbobj->delete(); </perl>

Java - Hibernate
  • Hibernate maps Java Objects directly to database tables
  • Scalable
  • Works well for controlled Data model
Java - iBatis
  • iBATIS maps Java Objects to the results of SQL Queries
  • XML definitions for queries
  • Queries and managing Maps
  • Transactions
  • Good fit for existing database schema
Summary
  • ORM provides painless roundtrip of data between the application and database.
  • Reduces the amount of SQL code and allows a programmatic style interface to the RDBMS
  • Choice of ORM solution depends on the type of project

XORT

Background

Technical Overview

  • Database connectivity:
  • Transaction support:
  • Code generation:

Special topics

Additional demonstrations of what your software does well.

Limitations

Presentation by Pinglei Zhou and Josh Goodman

Generic Modern Language Database?

We speak many languages with different people, for example:

  • My Parents: hometown dialect
  • My wife: Mandarin (official form of Chinese)
  • My son: Plain English
  • My colleagues: ‘chadoXML’

ChadoXML is the common language in the FlyBase and Chado world.

Introduction
  • An XML-database mapping system for data exchange between DB and XML-driven application
  • Developed/Supported by Pinglei Zhou at FlyBase Harvard, 0.007 version now.
  • Used: All FlyBase sites
  • Written in Perl
  • Required perl modules:
    • XML::Parser::PerlSAX
    • Unicode::String
    • XML::DOM
    • DBI
Components
  • Database & Schema
  • ChadoXML Specification
  • DumpSpec collections
  • Tools
Highlights of Chado XML Specification
  • Unique represent of specific database schema
  • Get away with those internal primary key value
  • Static vs. Operational
  • Encoding for non-ASCII characters
  • Macro mechanism (object reference)
Putting it together: New FlyBase dataflow Part 1

1.a. Proforma (FlyBase Cambridge) is converted to ChadoXML

1.b. ChadoXML is created by Apollo (Harvard)

1.c. ChadoXML is created by Java SEAN (Harvard)

2. All ChadoXML is loaded into Chado by XORT

Putting it together: New FlyBase dataflow Part 2

3. Chado (Harvard) is denormalized and loaded into Chado (Indiana)

4. ChadoXML is created from Chado using XORT

5.a. GFF and Fasta is created from ChadoXML

5.b. HTML is created from Chado XML

Data & Report Generation
  • Content of all output files is controlled by XML dumpspecs.
    • Dumpspecs are language independent.
    • Easily readable (with knowledge of Chado structure).
  • All XML transformation steps are done with XSLT v2.
    • Saxon XSLT (http://saxon.sourceforge.net/)
    • ChadoXML is split into individual chunks before XSLT processing to accommodate large file sizes.
    • Extremely fast. We can process all data for ~60,000 Drosophila genes in under 30 minutes.
Hibernate & XORT
  • Hibernate didn't scale well when dealing with 5,000+ features in bulk.
    • The test was simply calling print() statements
  • Performance tweaks for Hibernate can be quite complicated to setup for bulk operations.
  • XORT is currently handling ~6 million features in production with only minor performance problems.
  • XORT is much more language independent.
Support for complex transactions using XORT

For example:

  • Find all records linked to a record using dumpspec
  • Merge gene x into y, each with thousands of records attached

Step 1. Dump all data use simple dumpspec

<chado>
 <feature dump=“all”>
  <uniquename test=“eq”>x</uniquename>
 </feature>
</chado>

Step 2 Delete feature x from DB, with triggers to clean orphan records, if necessary

Step 3. Edit the output xml, change uniquename x to y, then load the edited file back to DB

CHIA (Chado Interface Application)

A Java application that organizes SQL and XORT functionality for internal users, e.g.:

  • Dump chado-XML for gene regions for Apollo curation
  • Organize and execute “canned” SQL queries
  • Serve IDs for curators (in development)
  • Dynamic browser Chado without writing SQL statement

CHIA is being designed to be extensible for adding new functionality as needed.

Limitations
  • DB Schema follow certain rules
    • All have internal int primary key
    • All have unique key(s)
  • It may take long path to retrieve certain type of data
    • Example: gene->allele->genotype->phenotype via feature_relationship
  • Structure not store in memory, you flush out data as it goes
Documentation
  • Using Chado to Store Genome Annotation Data
    • Current Protocols in Bioinformatics (Baxevanis, A.D., and Davison, D.B., eds) 2, 9.6.1-9.6.28.
  • XORT specification docs
  • XORT draft (unpublished)
  • GMOD case demo procedure
Acknowledgements
  • Willian Gelbart
  • Chris Mungall
  • David Emmert
  • Mark Gibson
  • Stan Letovsky
  • Nomi Harris
  • Frank Smutniak
  • Suzanna Lewis
  • Peili Zhang
  • Stan Letovsky
  • Haiyan Zhang
  • Aubrey de Grey
  • Andy Schroeder
  • Don Gilbert
  • Susan Russo
  • Mark Zythovicz
  • Scott Cain
  • Lincoln Stein
  • Victor Strelets
  • Robert Wilson
  • Paul Leyland

Chado::AutoDBI

Background

Technical Overview

  • Database connectivity:
  • Transaction support:
  • Code generation:

Special topics

  • Demonstrations of what your software does well

Limitations

Presentation by Brian O'Connor

Project Overview
  • Who wrote/supports it?
    • Chado::AutoDBI: Allen Day, Scott Cain, Brian O'Connor, & others
    • Turnkey: Allen Day, Scott Cain, Brian O'Connor
  • Third party code used?
    • Based on Class::DBI: Michael Schwern & Tony Bowden
Technical Overview
  • Code Generation


Project Overview

Convert SQL Queries/Inserts/Deletes -> Object Calls

 INSERT INTO feature (organism_id, name)
                                 VALUES (1, 'foo');

To:

<perl>

   my $feature = Turnkey::Model::Feature->find_or_create({
                      organism_id => $organism,
   		       name => 'xfile', uniquename => 'xfile',
   		       type_id => $mrna_cvterm,
   		       is_analysis => 'f', is_obsolete => 'f'
   		       });

</perl>

Technical Overview
  • Database Connection: use a base class

<perl> use base qw(Class::DBI::Pg);

my ($dsn, $name, $pass); $dsn = "dbi:Pg:host=localhost;dbname=chado;port=5432"; $name = "postgres"; $pass = "";

Turnkey::Model::DBI->set_db('Main', $dsn, $name, $pass, {AutoCommit => 1}); </perl>

Technical Overview
  • Basic ORM Object: Feature

<perl> package Turnkey::Model::Feature; use base 'Turnkey::Model::DBI';

Turnkey::Model::Feature->set_up_table('feature');

  1. Primary key accessors

sub id { shift->feature_id } sub feature { shift->feature_id } </perl>

  • data field accessors by Class::Accessor
Technical Overview
  • Basic ORM Object: Feature
    • has_a

<perl>

  1. has_a

Turnkey::Model::Feature->has_a( type_id => "Turnkey::Model::Cvterm" ); sub cvterm { return shift->type_id; } </perl>

  • Basic ORM Object: Feature
    • has_many

<perl>

  1. has_many

Turnkey::Model::Feature->has_many('feature_synonym_feature_id',

          'Turnkey::Model::Feature_Synonym' => 'feature_id');

sub feature_synonyms { return shift->feature_synonym_feature_id; }

Turnkey::Model::Feature->has_many('featureprop_feature_id',

          'Turnkey::Model::Featureprop' => 'feature_id');

sub featureprops { return shift->featureprop_feature_id; } </perl>


Technical Overview
  • Basic ORM Object: Feature
    • skipping linker tables for has_many

<perl>

  1. skip over feature_synonym table
  2. method 1

sub synonyms { my $self = shift; return map $_->synonym_id, $self->feature_synonyms; }

  1. method 2

Turnkey::Model::Feature->has_many( synonyms2 =>

                     ['Turnkey::Model::Feature_Synonym' => 'synonym_id']);

</perl>

Technical Overview
  • Transactions

<perl>

 sub do_transaction {
   my $class = shift;
   my ( $code ) = @_;
   # Turn off AutoCommit for this scope.
   # A commit will occur at the exit of this block automatically,
   # when the local AutoCommit goes out of scope.
   local $class->db_Main->{ AutoCommit };
   # Execute the required code inside the transaction.
   eval { $code->() };
   if ( $@ ) {
     my $commit_error = $@;
     eval { $class->dbi_rollback }; # might also die!
     die $commit_error;
   }
 }

</perl>

Technical Overview
  • Lazy Loading

<perl> Turnkey::Model::Feature->columns( Primary => qw/feature_id/ ); Turnkey::Model::Feature->columns( Essential => qw/name organism_id type_id/ ); Turnkey::Model::Feature->columns( Others => qw/residues .../ ); </perl>

Typically:

<perl> Turnkey::Model::Feature->set_up_table('feature'); </perl>

Problem 1
  • Create Feature & Add Description

<perl>

  1. now create mRNA feature

my $feature = Turnkey::Model::Feature->find_or_create({

                      organism_id => $organism,
   		       name => 'xfile', uniquename => 'xfile',
   		       type_id => $mrna_cvterm,
   		       is_analysis => 'f', is_obsolete => 'f'
   		       });
   
  1. create description

my $featureprop = Turnkey::Model::Featureprop->find_or_create({

                          value => 'A test gene for GMOD meeting',
                          feature_id => $feature,
   			   type_id => $note_cvterm,
   			   });

</perl>

Problem 2
  • Retrieve a Feature via Searching

<perl>

  1. objects for global use
  1. the organism for our new feature

my $organism = Turnkey::Model::Organism->search(abbreviation => "S.cerevisiae")->next;

  1. the cvterm for a "Note"

my $note_cvterm = Turnkey::Model::Cvterm->retrieve(2);

  1. searching name by wildcard

my @results = Turnkey::Model::Feature->search_like(name => 'x-%'); </perl>

Problems 3, 4, & 5
  • Update a Feature

<perl>

 # update the xfile gene name
 
 $feature->name("x-file");
 $feature->update();

</perl>

  • Delete a Feature

<perl>

  1. now delete the x-file feature

$feature->delete(); </perl>

Special Topics
  • Things Chado::AutoDBI does well
    • easy to use
    • easy to port
      • other DBs
      • other platforms
    • autogenerated via Turnkey
    • find_or_create
Limitations
  • performance
  • joins & complex queries

<perl>

  1. Add the add_constructor for looking for name lengths

__PACKAGE__ ->add_constructor(long_names => qq{ length(name) > 15 });

  1. Custom SQL

__PACKAGE__->set_sql(xfiles => qq{

      SELECT FEATURE_ID
      FROM FEATURE
      WHERE NAME = 'xfiles' });

</perl>

For More Information

Modware

Background

Technical Overview

  • Database connectivity:
  • Transaction support:
  • Code generation:

Special topics

  • Demonstrations of what your software does well

Limitations

Presentation by Eric Just

Eric Just, Senior Bioinformatics Scientist, dictyBase: http://dictybase.org Center for Genetic Medicine, Northwestern University. This is an edited version of Eric's presentation.

Why Modware Was Developed
  • Each feature type requires different behavior
  • Want to leave schema semantics out of application
  • Want to leverage work done in BioPerl
  • Re-use code developed for common use cases
What is in the Feature Table?

The core of Chado

  • Chromosome
  • Contig
  • Gene
  • mRNA
  • Exon
  • Lots of other things - See Sequence Ontology!
Modware Features
  • Multiple Feature classes
    • CHROMOSOME, GENE, MRNA, CONTIG
  • Each class provides type specific methods
  • Logic such as building exon structure of mRNA features is encapsulated
  • Parent class Modware::Feature
    • Provides common methods
    • Abstract factory for various feature types
Architectural Overview
  • Object-oriented Perl interface to Chado
  • Built on top of Chado::AutoDBI
  • Connection handled by GMOD
  • Database transactions supported
  • BioPerl used to represent and manipulate sequence and feature structure
  • ‘Lazy’ evaluation
Create and Insert Chromosome

<perl>

  my $seq_io = new Bio::SeqIO(
     -file   => "../data/fake_chromosome.txt",
     -format => 'fasta'
  );
  # Bio::SeqIO will return a Bio::Seq object which
  # Modware uses as its representation
  my $seq = $seq_io->next_seq();
  my $reference_feature = new Modware::Feature(
     -type            => 'chromosome',
     -bioperl         => $seq,
     -description     => "This is a test",
     -name            => 'Fake',
     -source          => 'GMOD 2007 Demo'
  );
  # Inserts chromosome into database
  $reference_feature->insert();

</perl>


Create and Insert a Gene

1) Enter the information about the following three novel genes, including the associated mRNA structures, into your database. Print the assigned feature_id for each inserted gene.

                        Gene Feature
                            symbol: x-ray
                            synonyms: none
                            mRNA Feature
                                exon:
                                    start: 1703
                                    end: 1900
                                    strand: 1
                                    srcFeature_id: Id of genomic sample


Create and Insert a Gene

1) Enter the information about the following three novel genes, including the associated mRNA structures, into your database. Print the assigned feature_id for each inserted gene.

                        Gene Feature
                           symbol: x-men
                           synonyms: wolverine
                           mRNA Feature
                               exon_1: 
                                   start: 12648
                                   end: 13136
                                   strand: 1
                                   srcFeature_id: 
                                       Id of genomic sample
Create and Insert a Gene

1) Enter the information about the following three novel genes, including the associated mRNA structures, into your database. Print the assigned feature_id for each inserted gene.

                     Gene Feature
                         symbol: xfile
                         synonyms: mulder, scully
                         description: A test gene for GMOD meeting
                         mRNA Feature
                             exon_1: 
                                 start: 13691
                                 end: 13767
                                 strand: 1
                                 srcFeature_id: Id of genomic sample
                             exon_2: 
                                 start: 14687
                                 end: 14720
                                  strand: 1
                                  srcFeature_id: Id of genomic sample
Create and Insert a Gene
      symbol: xfile
             synonyms: mulder, scully
             description: A test gene for GMOD meeting
     …

<perl> my $gene_feature = new Modware::Feature(

   -type           => 'gene',
   -name           => 'xfile',
   -description    => 'A test gene for GMOD meeting',
   -source         => 'GMOD 2007 Demo‘

);

$gene_feature->add_synonym( 'mulder' ); $gene_feature->add_synonym( 'scully' );

  1. inserts object into database

$gene_feature->insert(); print 'Inserted gene with feature_id:'.$gene_feature->feature_id()."\n"; </perl>

Create mRNA BioPerl Object
          exon_1:                                                    exon_2: 
              start: 13691                                             start: 14687
              end: 13767                                              end: 14720
              strand: 1                                                  strand: 1
              srcFeature_id: Id of genomic sample        srcFeature_id: Id of genomic sample

<perl>

  1. First, create exon features (using Bioperl)

my $exon_1 = new Bio::SeqFeature::Gene::Exon (

  -start     => 13691,
  -end       => 13767,
  -strand    => 1,
  -is_coding => 1

);

my $exon_2 = new Bio::SeqFeature::Gene::Exon (

  -start     => 14687,
  -end       => 14720,
  -strand    => 1,
  -is_coding => 1

);

  1. Next, create transcript feature to 'hold' exons (using Bioperl)

my $bioperl_mrna = new Bio::SeqFeature::Gene::Transcript();

  1. Add exons to transcript (using Bioperl)

$bioperl_mrna->add_exon( $exon_1 ); $bioperl_mrna->add_exon( $exon_2 ); </perl>

Create and Insert mRNA

The BioPerl object holds the location information, but now we want to create a Modware object and link it to the gene as well as locate it on the chromosome.

<perl>

    # Now create Modware Feature to 'hold' bioperl object
    my $mrna_feature = new Modware::Feature(
        -type              => 'mRNA',
        -bioperl           => $bioperl_mrna,
        -source            => 'GMOD 2007 Demo',
        -reference_feature => $reference_feature
    );
    # Associate mRNA to gene (required for insertion)
    $mrna_feature->gene( $gene_feature );
   # inserts object into database
    $mrna_feature->insert();

</perl>

Writing the Report

2) Retrieve and print the following report for gene xfile

   symbol: xfile
   synonyms: mulder, scully
   description: A test gene for GMOD meeting
   type: gene
   exon1 start: 13691
   exon1 end: 13767
   exon2 start: 14687
   exon2 end: 14720
   >xfile cds
   ATGGCGTTAGTATTCATGGTTACTGGTTTCGCTACTGATATCACCCAGCGTGTAGGCTGT
   GGAATCGAACACTGGTATTGTATAAATGTTTGTGAATACACTGAGAAATAA


<perl>

       use Modware::Gene;
       use GMODWriter;
       my $xfile_gene = new Modware::Gene( -name => 'xfile' );
       GMODWriter->Write_gene_report( $xfile_gene );

</perl>

Writing the Report

2) Retrieve and print the following report for gene xfile

<perl> package GMODWriter; sub Write_gene_report { my ($self, $gene) = @_; my $symbol = $gene->name();

my @synonyms = @{ $gene->synonyms() }; my $syn_string = join ",", @synonyms; my $description = $gene->description(); my $type = $gene->type();

  1. get features associated with the gene that are of type 'mRNA'

my ($mrna) = grep { $_->type() eq 'mRNA' } @{ $gene->features() };

  1. use bioperl method to get exons from mRNA

my @exons = $mrna->bioperl->exons_ordered();

  1. Modware will return a nice fasta file for you.

my $fasta = $mrna->sequence( -type => 'cds', -format => 'fasta' );

  1. Now print the actual report

print "symbol: $symbol\n"; print "synonyms: $syn_string\n"; print "description: $description\n"; print "type: $type\n";

my $count = 0; foreach my $exon (@exons ) {

  $count++;
  print "exon${count} start: ".$exon->start()."\n";
  print "exon${count} end: ".$exon->end()."\n";
 }
 print "$fasta";

}

. . .

</perl>

Updating a Gene Name

Problem 3) Update the gene xfile: change the name symbol to x-file and retrieve the changed record. Regenerate gene report

<perl>

use Modware::Gene;
use Modware::DBH;
use GMODWriter;
eval{
   # get xfile gene
   my $xfile_gene = new Modware::Gene( -name => 'xfile' );
   # change the name
   $xfile_gene->name( 'x-file' );
    # write changes to database
   $xfile_gene->update();
   # we can use the original object if we want, but instead
   # we refetch from the database to 'prove' the name has been changed
   my $xfile_gene2 = new Modware::Gene( -name => 'x-file' );
   # use our GMODWriter package to write report for x-file
   GMODWriter->Write_gene_report( $xfile_gene2 );
};
if ($@){
   warn $@;
   new Modware::DBH->rollback();
}

</perl>


Search and Display Results

Problem 4) Search for all genes with symbols starting with "x-*". With the results produce the following simple result list (organism will vary):

   1323    x-file  Xenopus laevis
   1324    x-men   Xenopus laevis
   1325    x-ray   Xenopus laevis

<perl>

    use Modware::Gene;
    use Modware::DBH;
    use GMODWriter;
    # find genes starting with 'x-'
    my $results = Modware::Search::Gene->Search_by_name( 'x-*' );
    # write the search results
    GMODWriter->Write_search_results( $results )

</perl>


Search and Display Results

Problem 4) Search for all genes with symbols starting with "x-*". With the results produce the following simple result list (organism will vary):

   1323    x-file  Xenopus laevis
   1324    x-men   Xenopus laevis
   1325    x-ray   Xenopus laevis


<perl> sub Write_search_results {

 my ($self, $itr) = @_;
 # loop through iterator
 while (my $gene = $itr->next) {
   # print the requested information
   print $gene->feature_id . "\t" . $gene->name .
     "\t" . $gene->organism_name . "\n";
 }

} </perl>

Delete a Gene

Problem 5) Delete the gene x-ray. Run the search and report again.

   1323    x-file  Xenopus laevis
   1324    x-men   Xenopus laevis

<perl>

# get the xray gene
my $xray = new Modware::Gene( -name => 'x-ray' );
# set is_deleted = 1, this will 'hide' the gene from Searches
$xray->is_deleted(1);
# write change to database
$xray->update();
# find genes starting with 'x-'
my $results = Modware::Search::Gene->Search_by_name( 'x-*' );
# write the search results
GMODWriter->Write_search_results( $results )

</perl>


Other Modware Highlights
  • Easy to write applications with Modware
  • Extensible
  • Available through Sourceforge
  • Easy to install
  • Large unit test coverage
  • Current release 0.2-RC1
    • Works with GMOD’s latest release
  • Sample script demoed here are available
    • sample_scripts directory
Other Nice Things About Modware


Coming Attractions
  • Support for changing genomic sequence
  • ncRNAs
  • UTRs
  • Onotology modules
  • Phenotype Annotations
  • Send us your ideas!


Limitations
  • Does not have full flexibility of Chado
  • Not enough users to get quality feedback
  • Performance (?)
  • Language dependent


Acknowlegments
  • Rex Chisholm, PhD
  • Warren Kibbe, PhD
  • Scott Cain
  • Brian O’connor
  • Sohel Merchant
  • Petra Fey
  • Pascale Gaudet,
  • Karen Pilcher
  • BioPerl
  • GMOD
  • SGD

GBrowse (DasI) Adaptor

Background

Technical Overview

  • Database connectivity:
  • Transaction support:
  • Code generation:

Special topics

  • Demonstrations of what your software does well

Limitations

Presentation by Scott Cain

Create the database
$ perl Makefile.PL
$ make
$ sudo make install
$ make load_schema
$ make prepdb             # now with Xenopus!
$ make ontologies        # load rel, SO, featureprop
Loading Data

Create some GFF from the specifications:

fake_chromosome example chromosome 1  15017 .  .  . ID=fake_chromosome;
                                                   Name=fake_chromosome
fake_chromosome example gene   13691  14720 .  +  . ID=xfile;Name=xfile;
                                                   Alias=mulder,scully;
                                                  Note=A test gene for GMOD meeting
fake_chromosome example mRNA   13691  14720 .  +  . ID=xfile_mRNA;
                                                   Parent=xfile
fake_chromosome example exon   13691  13767 .  +  . Parent=xfile_mRNA
fake_chromosome example exon   14687  14720 .  +  . Parent=xfile_mRNA
fake_chromosome example gene   12648  13136 .  +  . ID=x-men

And load it with the bulk loader:

$ gmod_bulk_load_gff3.pl -g sample.gff

...lots of output...

For kicks, set up GBrowse.

Adaptor components
  • Bio::DB::Das::Chado
    • Database connection object
  • Bio::DB::Das::Chado::Segment
    • Object for any range of DNA
  • Bio::DB::Das::Chado::Segment::Feature
Use Bio::DB::Das::Chado

<perl> use Bio::DB::Das::Chado;

my $chado = Bio::DB::Das::Chado->new(

     -dsn => "dbi:Pg:dbname=test",
     -user=> "scott",
     -pass=> "" ) || die "no new chado";

my $gene_name = 'xfile';

my ($gene_fo) = $chado->get_features_by_name($gene_name); </perl>

Use Some Accessors

<perl> print "symbol: " .$gene_fo->display_name."\n"; print "synonyms: " .join(', ',$gene_fo->synonyms)."\n"; print "description: " .$gene_fo->notes."\n"; print "type: " .$gene_fo->type."\n";

my ($mRNA) = $gene_fo->sub_SeqFeature(); my @exons = $mRNA->sub_SeqFeature();

for my $exon (@exons) {

   next unless ($exon->type->method eq 'exon');
   $exon_count++;
   print "exon$exon_count start: ".$exon->start."\n";
   print "exon$exon_count end: "  .$exon->end.  "\n";
   $cds_seq .= $exon->seq->seq; #the first seq

} #returns a Bio::Seq object </perl>

Bulk Output

<perl>

my $gene_name = 'x-*';
my @genes = $chado->get_features_by_name(
                -name => $gene_name,
                -class=> 'gene' );


for my $gene (@genes) {
    print join("\t",
               $gene->feature_id,
               $gene->display_name,
               $gene->organism),"\n";
}

</perl>

Advantages
  • Comes 'for free' with GBrowse
  • Uses 'familiar' BioPerl idioms, very similar to widely used Bio::DB::GFF (though with fewer methods)
Limitations
  • No ability to write
  • Incomplete implementation of Bio::DasI; just enough to make GBrowse work
  • As an aside: found two bugs while working on this presentation (now fixed in cvs).
  • Also, despite the name, has never been tested with a das server.
Conclusion
  • Not suitable as a 'general' middleware layer
  • However, it may be suitable for some applications, particularly if they are somehow similar to GBrowse or other uses of Bio::DB::GFF

iBatis

Background

Technical Overview

  • Database connectivity:
  • Transaction support:
  • Code generation:

Special topics

  • Demonstrations of what your software does well

Limitations

Presentation by Jeff Bowes

Jeff Bowes, Xenbase, University of Calgary

ibatis and Abator
  • iBatis
    • Light-weight framework
    • Still based on SQL but eliminates the repetitive drudgery of JDBC
    • Shallow learning curve
  • Abator
    • Generates ibatis CRUD objects by introspecting database tables
Abator
<abatorConfiguration>
 <abatorContext>    
   <jdbcConnection driverClass="COM.ibm.db2.jdbc.app.DB2Driver"
       connectionURL="jdbc:db2:XBDV05"
       userId="db2inst1"
       password=“*******">
     <classPathEntry location="/Program Files/IBM/SQLLIB/java/db2java.zip" />
   </jdbcConnection>

   <javaModelGenerator
  targetPackage="org.gmod.architecture.framwork.bakeoff.abator.model"
  targetProject="gene" />
   <sqlMapGenerator
  targetPackage="org.gmod.architecture.framwork.bakeoff.abator.sql"
  targetProject="gene" />
   <daoGenerator type="IBATIS"
  targetPackage="org.gmod.architecture.framwork.bakeoff.abator.dao"
  targetProject="gene" />
<abatorConfiguration>
Abator
<abatorConfiguration>
 <abatorContext>    
   <jdbcConnection driverClass="COM.ibm.db2.jdbc.app.DB2Driver"
       connectionURL="jdbc:db2:XBDV05"
       userId="db2inst1"
       password=“*******">
     <classPathEntry location="/Program Files/IBM/SQLLIB/java/db2java.zip" />
   </jdbcConnection>

   <javaModelGenerator
  targetPackage="org.gmod.architecture.framwork.bakeoff.abator.model"
  targetProject="gene" />
   <sqlMapGenerator
  targetPackage="org.gmod.architecture.framwork.bakeoff.abator.sql"
  targetProject="gene" />
   <daoGenerator type="IBATIS"
  targetPackage="org.gmod.architecture.framwork.bakeoff.abator.dao"
  targetProject="gene" />
<abatorConfiguration>
Abator

<table schema="db2inst1" tableName="synonym">

     <generatedKey column="synonym_id" sqlStatement="VALUES PREVVAL FOR
            synonym_seq" identity="true" />
     <columnOverride column="CREATED_BY" jdbcType="INTEGER" />
     <columnOverride column="MODIFIED_BY" jdbcType="INTEGER" />

</table>

Abator

Works as:

  • Eclipse plug-in
  • ANT
  • Standalone
DAO Methods
  • Insert (Feature)
  • Update (Feature)
  • DeletebyKey (FeatureKey)
  • SelectbyKey (FeatureKey)
  • SelectbyExample (FeatureExample)
  • DeletebyExample (FeatureExample)
Insert
<insert id="abatorgenerated_insert" parameterClass=
 "org.gmod.architecture.framwork.bakeoff.abator.model.FeatureWithBLOBs">
   insert into db2inst1.feature
           (DBXREF_ID, ORGANISM_ID, NAME, UNIQUENAME,
            RESIDUES, SEQLEN, MD5CHECKSUM, TYPE_ID, IS_ANALYSIS,
            IS_OBSOLETE, CREATED_BY)
   values (#dbxrefId:INTEGER#, #organismId:INTEGER#, #name:VARCHAR#,
           #uniquename:VARCHAR#, #residues:CLOB#, #seqlen:INTEGER#,
           #md5checksum:CHAR#, #typeId:INTEGER#,
           #isAnalysis:SMALLINT#,  #isObsolete:SMALLINT#,
          #createdBy:INTEGER#)

   <selectKey resultClass="java.lang.Integer" keyProperty="featureId">
       VALUES PREVVAL FOR feature_seq
   </selectKey>
 </insert>
Insert
<insert id="abatorgenerated_insert" parameterClass=
 "org.gmod.architecture.framwork.bakeoff.abator.model.FeatureWithBLOBs">
   insert into db2inst1.feature
           (DBXREF_ID, ORGANISM_ID, NAME, UNIQUENAME,
            RESIDUES, SEQLEN, MD5CHECKSUM, TYPE_ID, IS_ANALYSIS,
            IS_OBSOLETE, CREATED_BY)
   values (#dbxrefId:INTEGER#, #organismId:INTEGER#, #name:VARCHAR#,
           #uniquename:VARCHAR#, #residues:CLOB#, #seqlen:INTEGER#,
           #md5checksum:CHAR#, #typeId:INTEGER#,
           #isAnalysis:SMALLINT#,  #isObsolete:SMALLINT#,
                      #createdBy:INTEGER#)

   <selectKey resultClass="java.lang.Integer" keyProperty="featureId">
       VALUES PREVVAL FOR feature_seq
   </selectKey>
 </insert>
Insert
<insert id="abatorgenerated_insert" parameterClass=
 "org.gmod.architecture.framwork.bakeoff.abator.model.FeatureWithBLOBs">
   insert into db2inst1.feature
           (DBXREF_ID, ORGANISM_ID, NAME, UNIQUENAME,
            RESIDUES, SEQLEN, MD5CHECKSUM, TYPE_ID, IS_ANALYSIS,
            IS_OBSOLETE, CREATED_BY)
   values (#dbxrefId:INTEGER#, #organismId:INTEGER#, #name:VARCHAR#,
           #uniquename:VARCHAR#, #residues:CLOB#, #seqlen:INTEGER#,
           #md5checksum:CHAR#, #typeId:INTEGER#,
           #isAnalysis:SMALLINT#,  #isObsolete:SMALLINT#,
          #createdBy:INTEGER#)

   <selectKey resultClass="java.lang.Integer" keyProperty="featureId">
       VALUES PREVVAL FOR feature_seq
   </selectKey>
 </insert>
Insert
<selectKey resultClass="java.lang.Integer"
           keyProperty="featureId">
       VALUES PREVVAL FOR feature_seq
</selectKey>
Insert
<selectKey resultClass="java.lang.Integer"
           keyProperty="featureId">
       VALUES PREVVAL FOR feature_seq
</selectKey>
Insert

<java>

try {
 sqlMap.startTransaction();
 pGene.id =featureDAO.insert(pGene.getFeatureWithBLOBs());
 featurepropDAO.insert(pGene.getPropertyDescription());
 pGene.featurelocId = featurelocDAO.insert(pGene
                                  .getFeaturelocWithBLOBS());
 pGene = insertExons(pGene);
 insertSynonyms(pGene);
 sqlMap.commitTransaction();
} catch (Exception e) {
       System.out.println(e);
       throw (e);
} finally {
      sqlMap.endTransaction();
}

</java>

Insert

<java>

try {
 sqlMap.startTransaction();
 pGene.id =featureDAO.insert(pGene.getFeatureWithBLOBs());
 featurepropDAO.insert(pGene.getPropertyDescription());
 pGene.featurelocId = featurelocDAO.insert(pGene
                                  .getFeaturelocWithBLOBS());
 pGene = insertExons(pGene);
 insertSynonyms(pGene);
 sqlMap.commitTransaction();
} catch (Exception e) {
       System.out.println(e);
       throw (e);
} finally {
      sqlMap.endTransaction();
}

</java>

Transactions
  • SQLMap
  • JDBC
  • JTA - Java Transaction API
    • 2-Phase commit
  • Hibernate
  • External (Customized)
Retrieval
symbol: xfile
description: A test gene for GMOD meeting
mRNA Feature
        exon_1: start: 13691 end: 13767 
                        strand: 1
                        srcFeature_id: Id of genomic sample
        exon_2:      start: 14687 end: 14720
                         strand: 1
                         srcFeature_id: Id of genomic sample
Master Detail Reports
<resultMap id="SelectGeneResults"
  class="org.gmod.architecture.framwork.bakeoff.Gene" groupBy="id">
     <result column="FEATURE_ID" property="id" jdbcType="INTEGER"/>
     <result column="GENE_NAME" property="name" jdbcType="VARCHAR" />
     <result column="DESCRIPTION" property="description“
  jdbcType="VARCHAR" />
     <result column="TYPE_ID" property="typeId" jdbcType="INTEGER" />
     <result property="exons" resultMap = "gene.SelectExonResults"/>
 </resultMap>

<resultMap id="SelectExonResults"
  class="org.gmod.architecture.framwork.bakeoff.Exon">
    <result column="EXON_ID" property="id" jdbcType="INTEGER"/>
    <result column="EXON_NAME" property="name" jdbcType="VARCHAR" />
    <result column="EXON_RESIDUES" property="residues" jdbcType="CLOB" />
    <result column="STRAND" property="strand" jdbcType="INTEGER" />
    <result column="FMIN" property="fmin" jdbcType="INTEGER" />
    <result column="FMAX" property="fmax" jdbcType="INTEGER" />
    <result column="SRCFEATURE_ID" property="sourceFeatureId"
  jdbcType="INTEGER" />
</resultMap>
Master Detail Report
<resultMap id="SelectGeneResults"
  class="org.gmod.architecture.framwork.bakeoff.Gene" groupBy="id">
     <result column="FEATURE_ID" property="id" jdbcType="INTEGER"/>
     <result column="GENE_NAME" property="name" jdbcType="VARCHAR" />
     <result column="DESCRIPTION" property="description“
  jdbcType="VARCHAR" />
     <result column="TYPE_ID" property="typeId" jdbcType="INTEGER" />
     <result property="exons" resultMap = "gene.SelectExonResults"/>
 </resultMap>

<resultMap id="SelectExonResults"
  class="org.gmod.architecture.framwork.bakeoff.Exon">
    <result column="EXON_ID" property="id" jdbcType="INTEGER"/>
    <result column="EXON_NAME" property="name" jdbcType="VARCHAR" />
    <result column="EXON_RESIDUES" property="residues" jdbcType="CLOB" />
    <result column="STRAND" property="strand" jdbcType="INTEGER" />
    <result column="FMIN" property="fmin" jdbcType="INTEGER" />
    <result column="FMAX" property="fmax" jdbcType="INTEGER" />
    <result column="SRCFEATURE_ID" property="sourceFeatureId"
  jdbcType="INTEGER" />
</resultMap>
Master Detail Report
gene_id  Symbol   Type      Fmin   Fmax
6129482  x-files  gene      13691   13767
6129482  x-files  gene      14687   14720

Becomes:

gene_id  Symbol   Type      Fmin   Fmax
6129482  x-files  gene      13691   13767
                                          14687   14720
Dynamic Queries=
  • Gene Name (Description)
    • Feature, Featureprop
  • Symbol
    • Feature
  • Feature Synonyms
    • Feature, Feature_Synonym, Synonym
  • Ortholog Synonyms
    • Feature, Feature_relationship, Feature, Feature Synonyms
Dynamic Queries
FROM
    CAT_X_GENE_V gc
 <isEqual
prepend=",property="searchSymbol"
             compareValue="true">
    GENE_SYMBOLS s
 </isEqual>
 <isEqual prepend=","
property="searchNcbi"    
compareValue="true">
    NCBI_GI n
 </isEqual>
Dynamic Queries
<dynamic prepend="WHERE">
 <isEqual prepend="AND" property="searchNameOnly“
               compareValue="true">
  <iterate property="searchTokens" conjunction="AND"  
                   open=" (" close=") ">
         LOWER(VARCHAR(gc.longname)) LIKE             
        LOWER(CAST(#searchTokens[]:VARCHAR# AS VARCHAR(512)))
  </iterate>
</isEqual>

Iterate very useful for multiple search terms

Miscellaneous Features
  • Supports various data sources
    • Simple JDBC
    • DBCP – Apache Connection Pooling
    • JNDI – Java Naming Directory Interface
  • Very flexible
  • Local caching of results
    • Lazy loading
Support
  • In GMOD used by
    • Xenbase, Artemis at Sanger
  • Many other users:
    • e.g. MySpace.com
  • Top level Apache Project
    • www.ibatis.apache.org
  • Active community
iBatis Cons
  • Does not hide SQL
  • Does not create a whole object model of the db in memory
  • Not as widely used as Hibernate
  • No Perl version
iBatis Pros
  • Does not hide SQL
    • No new query language to learn
  • Separates and groups SQL
  • Simple!!
    • Light wrapper - No real tweaks
  • Does the job well
  • Excellent support for Master Detail and Dynamic queries.
Acknowledgements

GMOD

  • Eric Just
  • Everyone else

Ibatis Developers

  • Kevin Snyder,
  • Chris Jarabek,
  • Ross Gibb

PI

  • Peter Vize

Financial Support

  • Alberta Heritage Foundation for Medical Research
  • Alberta Network for Proteomics Innovation
  • University of Calgary, Faculty of Science
  • University of Calgary Dept. of Computer Science
  • NICHD

Hibernate

Background

Technical Overview

  • Database connectivity:
  • Transaction support:
  • Code generation:

Special topics

  • Demonstrations of what your software does well

Limitations

Presentation by Robert Bruggner

Chado API via Java & Hibernate Robert Bruggner, VectorBase.org

Overview
  • Background
  • Quick Hibernate Overview
  • Hibernate Connectivity and O/R Mapping Example
  • GMOD Demo
Background
  • VectorBase
    • A bioinformatic resource center for invertebrate vectors of human pathogens
  • Responsible for storage and display of multiple organisms’ genomes
    • Anopheles gambiae, Aedes aegypti, Ixodes scapularis, Culex pipiens and so on....
  • Want to store data for many organisms- Chado a natural choice
  • Ensembl Genome Browser already used for A. gambiae
    • Wrote Ensembl API Database adaptor for Chado... Not maintainable.
  • Use Both Databases
    • Transfer genomic data from Ensembl to Chado
    • Search Engine and Indexer
    • Run DAS
    • Export data via ChadoXML and GFF3
  • Need API for Database I/O
Hibernate Background
  • Hibernate
    • They say: “A powerful, high performance object/relational persistence and query service.”
  • Automates the persistence of plain old Java objects (POJO)
    • User maps their POJO properties to database tables via XML (HBM File).
    • Persist a specific object by storing it the database.
  • Intelligent Database I/O
    • Smart detection of “Dirty Properties” when performing Save / Update / Delete.
    • Cascadable Save / Update / Delete for complex objects.
Hibernate Database Connectivity
  • Configure Hibernate in hibernate.cfg.xml
  • Define a Data Source
    • We use a simple, single JDBC connection Chado
    • Can be configured to use a connection pool or data source accessible by the Java Naming and Directory Interface (JNDI).
    • Define a connection “dialect”
    • org.hibernate.dialect.PostgreSQLDialect
  • Describe the relationship between Java objects and database tables
    • Use XML to describe where to store POJO property data in the database
  • Create a new Hibernate Session based on the configuration
  • Begin a transaction to start performing work
POJO and HBM Example file - CV

<java>

public class CV {

   private int cv_id
   private String name;
   private String definition;
   public property gettersandsetters() {
           ....
   }
 
   public boolean equals(CV comparaCV) {
        ....
   }
   public int hashCode(){
        ...
   }
}

</java>

<hibernate-mapping>
 
   <class name="org.vectorbase.chadoAPI.chadoObjects.CV" table="cv">

   <id name="cv_id" column="cv_id" unsaved-value="undefined">

   <generator class="sequence">

   <param name="sequence">cv_cv_id_seq</param>

   </generator>

   </id>	

   <property name="name" column=”name” type="java.lang.String” not-null="true"/>

   <property name="definition" column=”definition” type="java.lang.String”/>

   </class>
</hibernate-mapping>
HBM Example CVTerm

<java>

public class CVTerm {

   private int cvterm_id;

   private CV cv;

   private String name;

   private String definition;

   private DBXref dbxref;

   private int is_obsolete;

   private int is_relationshiptype;
  
  .....

</java>

<hibernate-mapping>

   <class name="org.vectorbase.chadoAPI.chadoObjects.CVTerm" table="cvterm">

   <id name="cvterm_id" column="cvterm_id" unsaved-value="undefined">

   <generator class="sequence">

   <param name="sequence">cvterm_cvterm_id_seq</param>

   </generator>

   </id>

   <many-to-one name="cv" class="org.vectorbase.chadoAPI.chadoObjects.CV" column="cv_id" 
           not-null="true" cascade="save-update"/>

   <property name="name" not-null="true" type="java.lang.String"/>

   <property name="definition"/>

   <one-to-one name="dbxref" class="org.vectorbase.chadoAPI.chadoObjects.DBXref" cascade="all"/>

   <property name="is_obsolete"/>

   <property name="is_relationshiptype"/>

   </class>
 </hibernate-mapping>
</code>
Hibernate Object Retrieve

<java>

import org.hibernate.Session;
import org.vectorbase.chadoAPI.CVTerm;
import org.vectorbase.chadoAPI.CV;

// Load the configuration from hibernate.cfg.xml

// Build a session factory first (not shown)

// Get the session based on the configuration and begin transaction
Session session = HibernateSessionFactory.getCurrentSession();
session.beginTransaction();

// Load a CVTerm by its ID
CVTerm cvt = (CVTerm) session.get(CVTerm.class,1);

// Load a CVTerm using HQL
CVTerm cvt = session.createQuery(“from CVTerm where name=?”).setString(0,”name”).uniqueResult();

// Print out the name of the cvterm
System.out.println(cvt.getName());

// Get the cv that the cvterm is associated with
// Hibernate doesn’t return the cv_id - it returns a CV Object.
CV cv = cvt.getCv();

// Print out the cv’s name
System.out.println(cv.getName());

</java>

Hibernate Object Update

<java>

import org.hibernate.Session;
import org.vectorbase.chadoAPI.CVTerm;

// Load the configuration from hibernate.cfg.xml

// Build a session factory first (not shown)

// Get the session based on the configuration and begin transaction
Session session = HibernateSessionFactory.getCurrentSession();
session.beginTransaction();

// Load a CVTerm by its ID
CVTerm cvt = (CVTerm) session.get(CVTerm.class,1);

// Change cvt’s name
cvt.setName(“New CVTerm name”);

// Save!
// Generated SQL updates “Dirty” properties (name, in this case)
session.save(cvt);

// Commit data to database
session.commit();

</java>

Hibernate Save

<java>

import org.hibernate.Session;
import org.vectorbase.chadoAPI.CVTerm;
import org.vectorbase.chadoAPI.CV;

// Load the configuration from hibernate.cfg.xml
// Build a session factory first and get begin transaction (not shown)

// Make a new CV
CV new_cv = new CV();
new_cv.setName(“New CV”);
new_cv.setDefinition(“New CV Def”);

// Make a new cvterm for that cv
CVTerm new_cvterm = new CVTerm();
new_cvterm.setName(“New CVTerm Name”);
// ..... save dbxref etc......

// Add that CVTerm to our new CV
new_cv.addCVTerm(new_cvterm);

// Save the new data...
// Hibernate recognizes that it has to first save new_cv, then save new_cvterm.
session.save(new_cvterm);

session.commit();

// You can see the new id’s assigned by the database
System.out.println(new_cv.getCv_id());
System.out.println(new_cvterm.getCvterm_id());

</java>

Inheritance
<hibernate-mapping>

    <class name="org.vectorbase.chadoAPI.chadoObjects.Feature" table="feature" discriminator-
 value="not null">

    <id name="feature_id" column="feature_id" unsaved-value="undefined">

   <generator class="sequence">

   <param name="sequence">feature_feature_id_seq</param>

   </generator>

    </id>	

     <discriminator column="type_id" type="integer" insert="false"/>

    <many-to-one name="dbxref" class="org.vectorbase.chadoAPI.chadoObjects.DBXref" 
                 column="dbxref_id" cascade="all"/>

    <many-to-one name="organism" class="org.vectorbase.chadoAPI.chadoObjects.Organism" 
                 column="organism_id" not-null="true" cascade="save-update"/>

    <property name="name"/>
                 .....

<hibernate-mapping>	

    <subclass name="org.vectorbase.chadoAPI.chadoFeatures.Gene" 
         extends="org.vectorbase.chadoAPI.chadoObjects.Feature" discriminator-value="767">

    </subclass>
</hibernate-mapping>

Write custom methods for specific sub-classes

ChadoAPI
  • POJO Mappings
    • CV, CVTerm, DB, DBXref, Feature, FeatureCVTerm, FeatureDBXref, FeatureLoc, FeatureProp, FeatureRelationship, FeatureSynonym, Organism, Pub, Synonym
  • Extended Features
    • Chromosome, Gene, Transcript, Exon, Protein
  • Constants
    • CVTerms, FeatureFeatureRelationships, Ontologies
  • Special
    • ChadoAdapter
GMOD Example

<java>

// Set up our session and begin transaction
Session session = HibernateUtil.getSessionFactory().getCurrentSession();
session.beginTransaction();
                                                          
// Make a chado adpator and load up some utility objects
ChadoAdaptor ca = new ChadoAdaptor();
Chromosome c = ca.fetchChromosomeByUniqueName("fake_chromosome");
Pub null_pub = ca.fetchPubByPubID(1);
Organism agambiae = ca.fetchOrganismByScientificName("Anopheles","gambiae");
                                                          
// Begin GMOD Demo Code
                                                          
// Make our new gene;
Gene xfile = new Gene();
xfile.setOrganism(agambiae);
xfile.setUniquename("xfile");
xfile.setDescription("A test gene for GMOD meeting");
                                                          
/* Set the location of our gene. No need to set coordinates because they'll be updated
  * based on the exon boundaries. 
  */
FeatureLoc xfile_loc = new FeatureLoc();
xfile_loc.setSrcfeature(c);
xfile_loc.setStrand(1);
xfile.setFeatureLoc(xfile_loc);

// Add synonyms to xfile
xfile.createNewFeatureSynonym("mulder", null_pub, CVTerms.EXACT_SYNONYM);
xfile.createNewFeatureSynonym("scully", null_pub, CVTerms.EXACT_SYNONYM);

</java>

GMOD Example

<java>

// Create a new transcript for our gene.
Transcript t = xfile.createGeneTranscript("xfile-RA");

// Create some exons for that transcript.
t.createTranscriptExon("xfile:1", 13691, 13767);
t.createTranscriptExon("xfile:2", 14687, 14720);

// Save our new gene
session.save(xfile);
System.out.println("xfile feature_id is " + xfile.getFeature_id());

// Fetch our saved gene from the database
Gene xfile_r = ca.fetchGeneByUniqueName("xfile");
System.out.println("symbol: " + xfile_r.getUniquename());
System.out.print("synonyms: ");
for (FeatureSynonym fs : xfile_r.getFeatureSynonyms()){
        System.out.print(fs.getSynonym().getName() + " ");
}

System.out.println("description: " + xfile_r.getDescription());
System.out.println("type: " + xfile_r.getType().getName());

for (Transcript tx : xfile_r.fetchAllTranscripts()){
    for (Exon e : tx.fetchAllExons()){
         System.out.println(e.getUniquename() + " Start:\t" + e.getFeatureLoc().getFmin());
         System.out.println(e.getUniquename() + " End:\t" + e.getFeatureLoc().getFmax());
         System.out.println("\tSrcFeatureID: " + e.getFeatureLoc().getSrcfeature().getFeature_id());
    }
    System.out.println(">" + tx.getUniquename());
    System.out.println(tx.generateTranscriptSequenceFromExons().toUpperCase());
}

</java>

GMOD Update & Delete

<java>

// Lets update our name...
xfile_r.setUniquename("x-file");

session.save(xfile_r);

// Not part of the ChadoAdaptor utility object, but a good example of HQL
List<Gene> genes = (List<Gene>)session.createQuery("from Gene where uniquename like ?").setString(0,”x-%”).list();

for (Gene g : genes){

   System.out.println(g.getFeature_id() + 
                         "\t" + g.getUniquename() + 
                         "\t" + g.getOrganism().getGenus() +
                         " " + g.getOrganism().getSpecies());
}

// Deleting... hmm...
Gene delete_me = ca.fetchGeneByUniqueName("x-ray");
session.delete(delete_me);

// All Finished
session.getTransaction().commit();

</java>

To Do...
  • Completeness
  • Exception Handling
  • Performance Tuning
Acknowledgements
  • VectorBase People
    • Frank Collins, EO Stinson, Ryan Butler
  • GMOD
  • NIAID

PSU Chado Interface

Background

  • Source:
  • Language: Java
  • Authors: Chinmay Patel, Adrian Tivey
  • Users:
  • Support:
  • Third party code:

Technical Overview

  • Database connectivity:
  • Transaction support:
  • Code generation:

Special topics

  • Demonstrations of what your software does well

Limitations

Presentation by Chinmay Patel

GeneDB

  • GeneDB is the organism data resource and annotation database for the Pathogen Sequencing Unit (PSU) at the Sanger Institute, UK
  • Contains 37 organisms, which is expected to grow to 62
  • Currently migrating to chado schema
  • Java API with two engines Hibernate & iBatis
Technical - Connections

Connections are configured in the Spring configuration file <xml>

       <bean id="dataSource" class="org.apache.commons.dbcp.BasicDataSource">
              <property name="driverClassName" value="org.postgresql.Driver" />
          <property name="url" value="jdbc:postgresql://holly.sanger.ac.uk:5432/chado" />
              <property name="username" value="DELIBERATELY_BOGUS_NAME"/>
              <property name="password" value="WIBBLE" />
    </bean>

</xml>

  • Uses a connection pool
  • Connection to the database is specified graphically, so the iBatis configuration file has variables for the location:

<xml> <property name="JDBC.Driver" value="org.postgresql.Driver"/>

<property name="JDBC.ConnectionURL” value="jdbc:postgresql://${chado}"/>

<property name="JDBC.Username" value="${username}"/>

<property name="JDBC.Password" value="${password}"/> </xml>

  • provide database location, username & password
  • select from scrollable list of feature with residues (organisms in separate Postgres schemas) what to open in Artemis
Technical - Code Generation
  • The shared interface and hibernate implementation were originally generated
  • There’s no explicit code generation (although the Spring and Hibernate runtimes may use them behinds the scenes)
Technical - Transactions
  • Transactions are fully supported
  • There’s no explicit code generation (although the Spring and Hibernate runtimes may use them behind the scenes)
Demo – Sample Problem

Creating a gene <java> genes[0] = new Feature(ORG, GENE, "xfile", false, false, now, now);

genes[0].setSeqLen(1029); sequenceDao.persist(genes[0]);

FeatureLoc loc = new FeatureLoc(SOURCE_FEATURE, genes[0], 13691, false, 14720, false, (short)1, 0, 0 ,0);

sequenceDao.persist(loc);

addFeatureProp(genes[0], "description", "A test gene for GMOD meeting");

addSynonymsToFeature(genes[0], "mulder", "scully");

createExon("exon1", genes[0], 13691, 13767, now, 0);

createExon("exon2", genes[0], 14687, 14720, now, 1); </java>

Retrieve a gene <java> Feature f = sequenceDao.getFeatureByUniqueName("xfile"); displayGene(f); </java>

Update a gene <java> genes[0].setUniqueName("x-file");

sequenceDao.merge(genes[0]); </java>

Demo – Sample Problem

<java> private Feature createExon(String name, Feature gene, int min, int max, Timestamp now, int rank) {

       Feature exon = new Feature(ORG, EXON, name, false, false, now, now);
       exon.setSeqLen(max-min);
       sequenceDao.persist(exon);
       FeatureLoc loc = new FeatureLoc(SOURCE_FEATURE, exon, min, false, max, false,       
                                     (short)1, 0, 0 ,0);
       sequenceDao.persist(loc);
       return exon;

} </java>

Demo – Sample Problem

<xml> <st:section name="Naming" id="gene_naming" collapsed="false" collapsible="false" hideIfEmpty="true">

symbol:
${feature.uniqueName}
 <db:synonym name="synonym" var="name" collection="${feature.featureSynonyms}">
   
Synonym: <db:list-string collection="${name}" /> </db:synonym>
Type:
${feature.cvTerm.name}

<st:section name="Exons" collapsed="false" collapsible="true" hideIfEmpty="true">

           <display:table name="exons" uid="tmp" pagesize="30" class="simple" cellspacing="0"

cellpadding="4">

                <display:column property="uniqueName" title="Exon"/>
                <display:column property="featureLocsForSrcFeatureId.fmin" title="Start"/>
                <display:column property="featureLocsForSrcFeatureId.fmax" title="end"/>
           </display:table>

</st:section>

<st:section name="cds" collapsible="true">

           ${feature.residues}

</st:section> </xml>

Conclusions