<!-- BEGIN s5 -->
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>dbic_chado</title>
<meta name="generator" content="Spork-S5" />
<meta name="version" content="Spork-S5 0.04" />
<meta name="author" content="Robert Buels" />
<link rel="stylesheet" href="ui/slides.css" type="text/css" media="projection" id="slideProj" />
<link rel="stylesheet" href="ui/opera.css" type="text/css" media="projection" id="operaFix" />
<link rel="stylesheet" href="ui/print.css" type="text/css" media="print" id="slidePrint" />
<link rel="stylesheet" type="text/css" href="" />
<script src="ui/slides.js" type="text/javascript"></script>
</head>
<body>
<div class="layout">
<div id="currentSlide"></div>
<div id="header"></div>
<div id="footer">
<h2>Robert Buels</h2>
<h2>rmb32@cornell.edu</h2>
<div id="controls"></div>
</div>
</div>
<div class="slide">
<h1>Bio::Chado::Schema</h1>
<h2>San Diego, CA</h2>
<h3>January 13-14, 2010</h3>
</div>
<!-- BEGIN slide -->
<div class="slide">
<h1>What is DBIx::Class?</h1>
<ul>
<li>Object-relational mapping framework for Perl</li>
<li>is now the de-facto standard</li>
<li>nice features</li>
</ul>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>What is DBIx::Class?</h1>
<ul>
<li>query building (the magic of chainable ResultSets)</li>
<li>cross-database deployment (using <tt>SQL::Translator</tt> in the backend)</li>
<li>lots and lots of plugins availabe (dates, testing, ...)</li>
</ul>
<p>
</p>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>Data Model</h1>
<ul>
<li>a set of classes</li>
<li>encapsulate the underlying storage</li>
<li>providing a nicer, maintainable API for working with the data</li>
</ul>
<p>
This isn't really middleware, it's a code library.
</p>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>And therefore, Bio::Chado::Schema</h1>
<h2>Approx. 180 Perl classes</h2>
<ul>
<li>every table and view, every module</li>
<li>Chado docs mirrored in the POD</li>
</ul>
<p>
</p>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>And therefore, Bio::Chado::Schema</h1>
<h2>Approx. 180 Perl classes</h2>
<ul>
<li>every table and view, every module</li>
<li>Chado docs mirrored in the POD</li>
</ul>
<p>
</p>
<pre class="formatter_pre">( So why do we need this? )
</pre>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>Chado needs encapsulation</h1>
<h2>Chado's design makes for:</h2>
<ul>
<li>complex queries</li>
<li>steep learning curve</li>
<li>hard to get good performance</li>
</ul>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>Chado needs encapsulation</h1>
<h2>complex queries</h2>
<ul>
<li>generating</li>
<li>storing</li>
<li>automating</li>
</ul>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>Chado needs encapsulation</h1>
<h2>steep learning curve</h2>
<ul>
<li>codifying best practices</li>
<li>unified, high-level documentation</li>
<li>can help with database administration</li>
</ul>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>Chado needs encapsulation</h1>
<h2>performance</h2>
<ul>
<li>encapsulation => performance optimizations where needed</li>
<li>first step for more data management</li>
<ul>
<li>creative indexing</li>
<li>materialized views (with their own classes)</li>
</ul></ul>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>BCS Usage</h1>
<ul>
<li>open the schema. ( actual DB connection is deferred )</li>
</ul>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>BCS Usage</h1>
<ul>
<li>open the schema. ( actual DB connection is deferred )</li>
</ul>
<pre class="formatter_pre">my $chado = Bio::Chado::Schema->connect( 'dbi:Pg:...', $user, $pass );
</pre>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>BCS Usage</h1>
<ul>
<li>open the schema. ( actual DB connection is deferred )</li>
</ul>
<pre class="formatter_pre">my $chado = Bio::Chado::Schema->connect( 'dbi:Pg:...', $user, $pass );
</pre>
<ul>
<li>get a ResultSet object representing a set of Rows</li>
</ul>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>BCS Usage</h1>
<ul>
<li>open the schema. ( actual DB connection is deferred )</li>
</ul>
<pre class="formatter_pre">my $chado = Bio::Chado::Schema->connect( 'dbi:Pg:...', $user, $pass );
</pre>
<ul>
<li>get a ResultSet object representing a set of Rows</li>
</ul>
<pre class="formatter_pre">my $all_features = $chado->resultset('Sequence::Feature');
</pre>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>BCS Usage</h1>
<ul>
<li>open the schema. ( actual DB connection is deferred )</li>
</ul>
<pre class="formatter_pre">my $chado = Bio::Chado::Schema->connect( 'dbi:Pg:...', $user, $pass );
</pre>
<ul>
<li>get a ResultSet object representing a set of Rows</li>
</ul>
<pre class="formatter_pre">my $all_features = $chado->resultset('Sequence::Feature');
</pre>
<ul>
<li>a subset of those (another ResultSet)</li>
</ul>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>BCS Usage</h1>
<ul>
<li>open the schema. ( actual DB connection is deferred )</li>
</ul>
<pre class="formatter_pre">my $chado = Bio::Chado::Schema->connect( 'dbi:Pg:...', $user, $pass );
</pre>
<ul>
<li>get a ResultSet object representing a set of Rows</li>
</ul>
<pre class="formatter_pre">my $all_features = $chado->resultset('Sequence::Feature');
</pre>
<ul>
<li>a subset of those (another ResultSet)</li>
</ul>
<pre class="formatter_pre">my $other_feature = $all_features->search({ name => 'something' });
</pre>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>BCS Usage</h1>
<ul>
<li>open the schema. ( actual DB connection is deferred )</li>
</ul>
<pre class="formatter_pre">my $chado = Bio::Chado::Schema->connect( 'dbi:Pg:...', $user, $pass );
</pre>
<ul>
<li>get a ResultSet object representing a set of Rows</li>
</ul>
<pre class="formatter_pre">my $all_features = $chado->resultset('Sequence::Feature');
</pre>
<ul>
<li>a subset of those (another ResultSet)</li>
</ul>
<pre class="formatter_pre">my $other_feature = $all_features->search({ name => 'something' });
</pre>
<ul>
<li>get an actual data object</li>
</ul>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>BCS Usage</h1>
<ul>
<li>open the schema. ( actual DB connection is deferred )</li>
</ul>
<pre class="formatter_pre">my $chado = Bio::Chado::Schema->connect( 'dbi:Pg:...', $user, $pass );
</pre>
<ul>
<li>get a ResultSet object representing a set of Rows</li>
</ul>
<pre class="formatter_pre">my $all_features = $chado->resultset('Sequence::Feature');
</pre>
<ul>
<li>a subset of those (another ResultSet)</li>
</ul>
<pre class="formatter_pre">my $other_feature = $all_features->search({ name => 'something' });
</pre>
<ul>
<li>get an actual data object</li>
</ul>
<pre class="formatter_pre">my $some_feature = $all_features->find( 232432 );
$other_feature->first;
# or search in list context returns all resulting rows (careful!)
my @other_features = $all_features->search({ name => 'something' });
</pre>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>BCS Usage</h1>
<ul>
<li>open the schema. ( actual DB connection is deferred )</li>
</ul>
<pre class="formatter_pre">my $chado = Bio::Chado::Schema->connect( 'dbi:Pg:...', $user, $pass );
</pre>
<ul>
<li>get a ResultSet object representing a set of Rows</li>
</ul>
<pre class="formatter_pre">my $all_features = $chado->resultset('Sequence::Feature');
</pre>
<ul>
<li>a subset of those (another ResultSet)</li>
</ul>
<pre class="formatter_pre">my $other_feature = $all_features->search({ name => 'something' });
</pre>
<ul>
<li>get an actual data object</li>
</ul>
<pre class="formatter_pre">my $some_feature = $all_features->find( 232432 );
$other_feature->first;
# or search in list context returns all resulting rows (careful!)
my @other_features = $all_features->search({ name => 'something' });
</pre>
<ul>
<li>get its type cvterm</li>
</ul>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>BCS Usage</h1>
<ul>
<li>open the schema. ( actual DB connection is deferred )</li>
</ul>
<pre class="formatter_pre">my $chado = Bio::Chado::Schema->connect( 'dbi:Pg:...', $user, $pass );
</pre>
<ul>
<li>get a ResultSet object representing a set of Rows</li>
</ul>
<pre class="formatter_pre">my $all_features = $chado->resultset('Sequence::Feature');
</pre>
<ul>
<li>a subset of those (another ResultSet)</li>
</ul>
<pre class="formatter_pre">my $other_feature = $all_features->search({ name => 'something' });
</pre>
<ul>
<li>get an actual data object</li>
</ul>
<pre class="formatter_pre">my $some_feature = $all_features->find( 232432 );
$other_feature->first;
# or search in list context returns all resulting rows (careful!)
my @other_features = $all_features->search({ name => 'something' });
</pre>
<ul>
<li>get its type cvterm</li>
</ul>
<pre class="formatter_pre">say $some_feature->type->name;
</pre>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>BCS Usage: Joined Select</h1>
<pre class="formatter_pre"># get features via the potato organism, also joining in the cvterms table
my $potato_bacs =
$chado->resultset('Organism::Organism')
->search({ species => 'Solanum tuberosum' })
->search_related( 'features',
{ 'type.name' => 'BAC_clone'},
{ 'join' => 'type' },
);
</pre>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>BCS Usage: Joined Select</h1>
<pre class="formatter_pre"># the equivalent bare SQL
my $potato_bacs = $dbh->selectall_arrayref( <<EOS, undef, 'Solanum tuberosum', 'BAC_clone');
SELECT features.feature_id
, features.dbxref_id
, features.organism_id
, features.name
, features.uniquename
, features.residues
, features.seqlen
, features.md5checksum
, features.type_id
, features.is_analysis
, features.is_obsolete
, features.timeaccessioned
, features.timelastmodified
FROM organism me
LEFT JOIN feature features
ON features.organism_id = me.organism_id
JOIN cvterm type
ON type.cvterm_id = features.type_id
WHERE type.name = 'BAC_clone' AND species = 'Solanum tuberosum'
EOS
</pre>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>BCS Usage: Loading</h1>
<pre class="formatter_pre">$chado->resultset( 'Cv::Cv' )
->find_or_create({ name => 'My Fake Ontology' })
->create_related( 'cvterm',
{ name => 'MyFakeTerm' });
</pre>
<p>
makes the SQL:
</p>
<pre class="formatter_pre">SELECT me.cv_id
, me.name
, me.definition
FROM cv me
WHERE ( me.name = 'my fake ontology' )
INSERT INTO cv ( name )
VALUES ( ? )
</pre>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>BCS Usage: Transactions</h1>
<pre class="formatter_pre">$chado->txn_do(sub {
$chado->resultset('Cv::Cv')
->find_or_create({ name => 'My Fake Ontology' })
->create_related( 'cvterm', { name => 'MyFakeTerm' } );
});
</pre>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>The Real Advantages of DBIC</h1>
<ul>
<li>easier to manipulate and assemble queries</li>
<li>Don't Repeat Yourself</li>
</ul>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>The Real Advantages of DBIC</h1>
<ul>
<li>it's all objects. you can delegate to them, pass them around, etc.</li>
<li>HOWEVER:</li>
</ul>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>The Real Advantages of DBIC</h1>
<ul>
<li>it's all objects. you can delegate to them, pass them around, etc.</li>
<li>HOWEVER:</li>
<ul>
<li>usually you don't want to subclass them</li>
<li>but, see <tt>DBIx::Class::Manual::Cookbook</tt></li>
</ul></ul>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>The Real Advantages of DBIC</h1>
<ul>
<li>complex joined queries (Chado queries) are very easy and compact</li>
</ul>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>The Real Advantages of DBIC</h1>
<ul>
<li>SQL syntax errors are much more difficult to make</li>
</ul>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>Using DBIC with your own tables</h1>
<ul>
<li>use <tt>DBIx::Class::Schema::Loader</tt> to dump a whole set</li>
<li>make your own definitions</li>
</ul>
<ul>
<li>your table: other_thing, foreign key feature_id to Chado feature table</li>
</ul>
<pre class="formatter_pre">
package My::DBIC:::Layer::OtherThing;
use base 'DBIx::Class::Core';
__PACKAGE__->table('other_thing');
__PACKAGE__->add_columns(
'other_thing_id' => { ... },
'name' => { ... },
'definition' => { ... },
'feature_id' => { ... },
);
__PACKAGE__->set_primary_key('other_thing_id');
__PACKAGE__->add_unique_constraint('ot_c1', ['name']);
__PACKAGE__->belongs_to(
'feature',
'Bio::Chado::Schema::Sequence::Feature',
{ 'foreign.feature_id' => 'self.feature_id' },
);
</pre>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>"Duct tape" BCS to your own schema</h1>
<ul>
<li>make an accessor 'other_things' that ties your own DBIC class to BCS</li>
</ul>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>"Duct tape" BCS to your own schema</h1>
<ul>
<li>make an accessor 'other_things' that ties your own DBIC class to BCS</li>
</ul>
<pre class="formatter_pre">
Bio::Chado::Schema::Sequence::Feature->has_many(
'other_things',
'My::DBIC::Layer::OtherThing',
{ 'foreign.feature_id' => 'self.feature_id' },
);
</pre>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>"Duct tape" BCS to your own schema</h1>
<ul>
<li>make an accessor 'other_things' that ties your own DBIC class to BCS</li>
</ul>
<pre class="formatter_pre">
Bio::Chado::Schema::Sequence::Feature->has_many(
'other_things',
'My::DBIC::Layer::OtherThing',
{ 'foreign.feature_id' => 'self.feature_id' },
);
</pre>
<ul>
<li>add it to the BCS schema dynamically</li>
</ul>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>"Duct tape" BCS to your own schema</h1>
<ul>
<li>make an accessor 'other_things' that ties your own DBIC class to BCS</li>
</ul>
<pre class="formatter_pre">
Bio::Chado::Schema::Sequence::Feature->has_many(
'other_things',
'My::DBIC::Layer::OtherThing',
{ 'foreign.feature_id' => 'self.feature_id' },
);
</pre>
<ul>
<li>add it to the BCS schema dynamically</li>
</ul>
<pre class="formatter_pre">Bio::Chado::Schema->register_source('OtherThing', 'My::DBIC::Layer::OtherThing');
</pre>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>"Duct tape" BCS to your own schema</h1>
<ul>
<li>make an accessor 'other_things' that ties your own DBIC class to BCS</li>
</ul>
<pre class="formatter_pre">
Bio::Chado::Schema::Sequence::Feature->has_many(
'other_things',
'My::DBIC::Layer::OtherThing',
{ 'foreign.feature_id' => 'self.feature_id' },
);
</pre>
<ul>
<li>add it to the BCS schema dynamically</li>
</ul>
<pre class="formatter_pre">Bio::Chado::Schema->register_source('OtherThing', 'My::DBIC::Layer::OtherThing');
</pre>
<ul>
<li>use it with the rest</li>
</ul>
<small>continued...</small>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>"Duct tape" BCS to your own schema</h1>
<ul>
<li>make an accessor 'other_things' that ties your own DBIC class to BCS</li>
</ul>
<pre class="formatter_pre">
Bio::Chado::Schema::Sequence::Feature->has_many(
'other_things',
'My::DBIC::Layer::OtherThing',
{ 'foreign.feature_id' => 'self.feature_id' },
);
</pre>
<ul>
<li>add it to the BCS schema dynamically</li>
</ul>
<pre class="formatter_pre">Bio::Chado::Schema->register_source('OtherThing', 'My::DBIC::Layer::OtherThing');
</pre>
<ul>
<li>use it with the rest</li>
</ul>
<pre class="formatter_pre">$chado->resultset('Sequence::Feature')->other_things;
</pre>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>Making a composite schema</h1>
<pre class="formatter_pre">my $merged_schema_class =
Bio::Chado::Schema->merge( 'My::DBIC::Layer' );
$merged_schema_class->connect( ... );
$chado->resultset('Sequence::Feature')->other_things;
$chado->resultset('OtherThing')->find(...)->feature;
</pre>
<p>
Note: merge() is new in BCS 0.6, releasing soon.
</p>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h1>Further Work</h1>
<h2>Still need to add in some DBIx::Class relationships:</h2>
<ul>
<li>more many_to_many relationships (must be added manually)</li>
</ul>
<h2>More useful things are needed:</h2>
<ul>
<li>automate more common querying and loading patterns</li>
<li>compatibility with BioPerl data objects</li>
</ul>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h2>Acknowledgments</h2>
<ul>
<li>Aure Bombarely (SGN)</li>
<li>Naama Menda (SGN)</li>
<li>Siddhartha Basu (dictybase)</li>
<li>Lukas Mueller (SGN)</li>
</ul>
</div>
<!-- END slide -->
<!-- BEGIN slide -->
<div class="slide">
<h2>That's All</h2>
<ul>
<li>The END</li>
</ul>
</div>
<!-- END slide -->
</body>
</html>
<!-- END s5 -->