package Analizo::Batch::Output::DB;
use strict;
use warnings;
use parent qw( Analizo::Batch::Output );
use DBI;
use Digest::SHA qw(sha1_hex);
sub database($) {
my ($self) = @_;
my $db = $self->file || 'output.sqlite3';
if ($db =~ /^dbi:/) {
return $db;
} else {
return 'dbi:SQLite:' . $db;
}
}
sub push($$) {
my ($self, $job) = @_;
my $project_id = $self->_add_project($job->project_name);
my $developer_id = $self->_add_developer($job);
my $commit_id = $self->_add_commit($job, $project_id, $developer_id);
$self->_add_modules($job, $commit_id, $project_id);
}
sub flush($) {
my ($self) = @_;
$self->{dbh}->disconnect();
}
sub _find_row_id($$@) {
my ($self, $sql, @data) = @_;
my $statement_id = 'st_find_' . sha1_hex($sql); # is this SHA1 needed at all?
$self->{$statement_id} ||= $self->{dbh}->prepare($sql);
my $list = $self->{dbh}->selectall_arrayref($self->{$statement_id}, {}, @data);
if (scalar(@$list) == 0) {
return undef;
} else {
return $list->[0]->[0];
}
}
sub _add_project($$) {
my ($self, $project) = @_;
$self->{st_add_project} ||= $self->{dbh}->prepare('INSERT INTO projects (name) values(?)');
my $project_id = $self->_find_project($project);
if (! $project_id) {
$self->{st_add_project}->execute($project);
$project_id = $self->_find_project($project);
}
return $project_id;
}
sub _find_project($$) {
my ($self, $project) = @_;
return $self->_find_row_id('SELECT id from projects where name = ?', $project);
}
sub _add_developer($$) {
my ($self, $job) = @_;
my $metadata = $job->metadata_hashref();
my $name = $metadata->{author_name};
my $email = $metadata->{author_email};
# FIXME unstested
if (!$name || !$email) {
return undef;
}
$self->{st_add_developer} ||= $self->{dbh}->prepare('INSERT INTO developers (name,email) VALUES (?,?)');
my $developer_id = $self->_find_developer($name, $email);
if (! $developer_id) {
$self->{st_add_developer}->execute($name, $email);
$developer_id = $self->_find_developer($name, $email);
}
return $developer_id;
}
sub _find_developer($$$) {
my ($self, $name, $email) = @_;
return $self->_find_row_id('SELECT id FROM developers WHERE name = ? AND email = ?', $name, $email);
}
sub _add_commit($$$$) {
my ($self, $job, $project_id, $developer_id) = @_;
unless ($self->_find_row_id('SELECT id FROM commits where id = ?', $job->id)) {
my $metadata = $job->metadata_hashref;
my $previous_commit_id = $metadata->{previous_commit_id};
my $date = $metadata->{author_date};
my ($summary, $details) = ({}, {});
if ($job->metrics) {
($summary, $details) = $job->metrics->data();
}
my ($metric_placeholders_sql, $metric_column_names_sql, @metric_values) = _metrics_sql($summary, _project_metric_columns());
$self->{st_insert_commit} ||= $self->{dbh}->prepare("INSERT INTO commits (id, project_id,developer_id,previous_commit_id,date,$metric_column_names_sql) VALUES(?,?,?,?,?,$metric_placeholders_sql)");
$self->{st_insert_commit}->execute($job->id, $project_id, $developer_id, $previous_commit_id, $date, @metric_values);
}
return $job->id;
}
sub _add_modules($$$$) {
my ($self, $job, $commit_id, $project_id) = @_;
my $metadata = $job->metadata_hashref();
my %module_versions = ();
if ($metadata->{files}) {
for my $file (keys(%{$metadata->{files}})) {
for my $module ($job->model->module_by_file($file)) {
unless($module_versions{$module}) {
my $module_id = $self->_add_module($module, $project_id);
my $module_files = $job->model->files($module);
my @file_ids = map { $metadata->{files}->{$_} } sort(@$module_files);
my $module_version_id = _module_version_id(@file_ids);
my $metrics = $job->metrics->metrics_for($module);
$self->_add_module_version($commit_id, $module_id, $module_version_id, %$metrics);
$module_versions{$module} = $module_version_id;
}
}
}
}
if ($metadata->{changed_files}) {
my %already_marked = ();
for my $file (keys(%{$metadata->{changed_files}})) {
for my $module ($job->model->module_by_file($file)) {
next if $already_marked{$module};
$already_marked{$module} = 1;
my $module_version_id = $module_versions{$module};
my $module_files = $job->model->files($module);
my @statuses = map { $metadata->{changed_files}->{$_} || 'K' } @$module_files;
my $statuses = join('', @statuses);
if ($statuses =~ /^A+$/) {
$self->_mark_as_added($commit_id, $module_version_id);
} elsif ($statuses !~ /^D+$/) {
$self->_mark_as_modified($commit_id, $module_version_id);
}
}
}
}
}
sub _add_module($$$) {
my ($self, $module, $project_id) = @_;
my $module_id = $self->_find_module($module, $project_id);
if (!$module_id) {
$self->{st_add_module} ||= $self->{dbh}->prepare('INSERT INTO modules (name, project_id) values (?,?)');
$self->{st_add_module}->execute($module, $project_id);
$module_id = $self->_find_module($module, $project_id);
}
return $module_id;
}
sub _find_module($$$) {
my ($self, $module, $project_id) = @_;
return $self->_find_row_id('SELECT id FROM modules WHERE name = ? AND project_id = ?', $module, $project_id);
}
sub _module_version_id {
my @file_ids = @_;
my $module_version_id;
if (scalar(@file_ids) == 1) {
$module_version_id = $file_ids[0];
} else {
$module_version_id = sha1_hex(join('', @file_ids));
}
return $module_version_id;
}
sub _add_module_version {
my ($self, $commit_id, $module_id, $module_version_id, %metrics) = @_;
my $module_version_already_exists = $self->_find_row_id('SELECT id from module_versions WHERE module_id = ? AND id = ?', $module_id, $module_version_id);
unless ($module_version_already_exists) {
my ($metric_placeholders_sql, $metric_column_names_sql, @metric_values) = _metrics_sql(\%metrics, _module_metric_columns());
my $statement = "INSERT INTO module_versions (id,module_id,$metric_column_names_sql) VALUES (?,?,$metric_placeholders_sql)";
$self->{st_add_module_version} ||= $self->{dbh}->prepare($statement);
$self->{st_add_module_version}->execute($module_version_id, $module_id, @metric_values);
}
$self->{st_link_commit_and_module_version} ||= $self->{dbh}->prepare('INSERT INTO commits_module_versions (commit_id,module_version_id) VALUES (?,?)');
$self->{st_link_commit_and_module_version}->execute($commit_id, $module_version_id);
return $module_version_id;
}
sub _mark_as_added($$$) {
my ($self, $commit_id, $module_version_id) = @_;
$self->{st_mark_as_added} ||= $self->{dbh}->prepare('UPDATE commits_module_versions SET added = 1 WHERE commit_id = ? AND module_version_id = ?');
$self->{st_mark_as_added}->execute($commit_id, $module_version_id);
}
sub _mark_as_modified($$$) {
my ($self, $commit_id, $module_version_id) = @_;
$self->{st_mark_as_modified} ||= $self->{dbh}->prepare('UPDATE commits_module_versions SET modified = 1 WHERE commit_id = ? AND module_version_id = ?');
$self->{st_mark_as_modified}->execute($commit_id, $module_version_id);
}
sub _metrics_sql($@) {
my ($metrics_hash, @metric_columns) = @_;
my $metric_placeholders_sql = join(',', map { '?' } @metric_columns);
my $metric_column_names_sql = join(',', @metric_columns);
my @metric_values = map { $metrics_hash->{$_} } @metric_columns;
return ($metric_placeholders_sql, $metric_column_names_sql, @metric_values)
}
# Initializes the database
#
# TODO the current approach of feeding DDL SQL statements directly to the
# database handle might not be good enough, since the SQL being written might
# not be portable to other databases than SQLite. It would be nice to have
# something similar to the rails migrations DSL here.
sub initialize($) {
my ($self) = @_;
$self->{dbh} = DBI->connect($self->database, undef, undef, { RaiseError => 1, InactiveDestroy => 1});
# assume that if there is a table called `analizo_metadata`, then the database was already initialized
if (!grep { $_ =~ /analizo_metadata/ } $self->{dbh}->tables()) {
for my $statement (ddl_statements()) {
$statement =~ s/\@\@MODULE_METRICS\@\@/_metric_columns_ddl(_module_metric_columns())/egm;
$statement =~ s/\@\@PROJECT_METRICS\@\@/_metric_columns_ddl(_project_metric_columns())/egm;
$statement =~ s/\@\@NUMERIC_AUTOINC_PK\@\@/_numeric_autoinc_pk($self->database)/egm;
$self->{dbh}->do($statement);
}
}
}
my $DDL_INITIALIZED = 0;
my @DDL_STATEMENTS = ();
sub ddl_statements($) {
if (!$DDL_INITIALIZED) {
my $sql = '';
while (my $line = <DATA>) {
$sql .= $line;
if ($line =~ /;\s*$/) {
# SQL statement is ready
CORE::push @DDL_STATEMENTS, $sql;
$sql = '';
}
}
$DDL_INITIALIZED = 1;
}
return @DDL_STATEMENTS;
}
use Analizo::Metrics;
use Analizo::Model;
my $__metric_columns = undef;
sub _metric_columns() {
if (!$__metric_columns) {
my $metrics = Analizo::Metrics->new(model => Analizo::Model->new);
my %module_metrics = $metrics->list_of_metrics();
my @module_metrics= keys(%module_metrics);
my %project_metrics = $metrics->list_of_global_metrics();
my @project_metrics = keys(%project_metrics);
$__metric_columns = {
module => \@module_metrics,
project => \@project_metrics,
};
}
return $__metric_columns;
}
sub _module_metric_columns() {
return @{ _metric_columns()->{module} };
}
sub _project_metric_columns() {
return @{ _metric_columns()->{project} };
}
sub _metric_columns_ddl {
my @columns = @_;
my $ddl = join(",\n ", map { "$_ REAL"} @columns);
return $ddl;
}
my %NUMERIC_AUTOINC_PK = (
'sqlite' => 'INTEGER PRIMARY KEY AUTOINCREMENT',
'pg' => 'SERIAL PRIMARY KEY',
);
sub _numeric_autoinc_pk($) {
my ($database) = @_;
my $dbtype = lc($database);
$dbtype =~ s/^dbi:(.*):.*/$1/;
my $sql = $NUMERIC_AUTOINC_PK{$dbtype};
die("Database $dbtype is not supported!") if (!defined($sql));
return $sql;
}
1;
__DATA__
/* Analizo DB output schema definition */
CREATE TABLE projects (
id @@NUMERIC_AUTOINC_PK@@,
name CHAR(250)
);
CREATE TABLE developers (
id @@NUMERIC_AUTOINC_PK@@,
name CHAR(250),
email CHAR(250)
);
CREATE TABLE commits (
id CHAR(40) PRIMARY KEY,
previous_commit_id CHAR(40),
project_id INTEGER,
date INTEGER,
developer_id INTEGER,
@@PROJECT_METRICS@@
);
CREATE INDEX commits_project_id ON commits (project_id);
CREATE TABLE modules (
id @@NUMERIC_AUTOINC_PK@@,
project_id INTEGER,
name CHAR(250)
);
CREATE UNIQUE INDEX modules_project_id_name ON modules(project_id, name);
CREATE TABLE module_versions (
id CHAR(40),
module_id INTEGER,
@@MODULE_METRICS@@,
PRIMARY KEY (id, module_id)
);
CREATE TABLE commits_module_versions (
id @@NUMERIC_AUTOINC_PK@@,
commit_id CHAR(40),
module_version_id CHAR(40),
modified INTEGER default 0,
added INTEGER default 0
);
CREATE INDEX commits_module_versions_commit_id ON commits_module_versions (commit_id);
CREATE TABLE analizo_metadata (
key CHAR(100),
value CHAR(100)
);