そういやURIモジュールって何でHashベースじゃないんだろう

URIモジュールってのは凄く便利なのでみんな重宝してるんだけどアレってスカラリファレンスのblessなんだよね。

いつも思うのが何でハッシュリファレンスにしなかったのかなーってこと。

まぁURIモジュール自体、かなり汎用的なモジュールなのでスカラの方がデータ構造をあれこれ考えなくても良い分、楽だったのかもしれない。

とはいえ実際問題、URIモジュールの使用用途ってURI::httpがその殆どを占めてると思うのでちょっくら適当にHashベースのURI::httpを実装してみた。もちろん全部じゃなくていくつかのメソッドだけね。

package URI::Hash::http;
use Moose;
use URI();
use URI::Escape();
use overload (
    fallback => 1,
    '""'     => sub { shift->as_string },
    '=='     => sub { overload::StrVal($_[0]->as_string) eq overload::StrVal($_[1]->as_string) },
);

has scheme       => ( is => 'rw' );
has authority    => ( is => 'rw' );
has host         => ( is => 'rw' );
has port         => ( is => 'rw' );
has default_port => ( is => 'rw' , default => 80 );
has path         => ( is => 'rw' );
has query        => ( is => 'rw' );
has fragment     => ( is => 'rw' );
has _query       => ( is => 'rw' , defualt => {} );

sub BUILDARGS {
    my $class = shift;
    return { url => shift };
}

sub BUILD {
    my $self = shift;
    my $url  = shift->{url};
    
    if ( $url =~ m{^(https?)://(([^/:]+)(?::(\d+))?)/([^?\#]*)(?:\?([^\#]*))?(.*)$}s ) {
        $self->scheme($1);
        $self->authority($2);
        $self->host($3);
        $self->port($4 || $self->default_port);
        $self->path(q{/}.$5) if $5;
        $self->fragment(substr $7, 1) if $7;
        if ( my $q = $6 ) {
            $self->_query({ map { /=/ ? split(/=/, $_, 2) : ($_ => '')} split(/&/, $q) });
            $q =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go;
            $self->query($q);
        }
    }
}

sub path_query {
    my $self = shift;
    my $str = $self->path;
    $self->query ? $str.q{?}.$self->query : $str;
}

sub host_port {
    my $self = shift;
    $self->host.q{:}.$self->port;
}

sub opaque {
    my $self = shift;
    return q{//}.$self->authority.$self->path_query;
}

sub as_string {
    my $self = shift;
    my $str = $self->scheme.q{:}.$self->opaque;
    $self->fragment ? $str.q{#}.$self->fragment : $str;
}

sub query_form {
    my $self = shift;
    return wantarray ? %{$self->_query} : $self->_query unless @_;
    my $param = @_ == 1 ? shift : {@_};
    my @query;
    for my $key ( keys %$param ) {
        my $vals = $param->{$key};
        $key = '' unless defined $key;
        $key =~ s/([;\/?:@&=+,\$\[\]%])/$URI::Escape::escapes{$1}/g;
        $key =~ s/ /+/g;
        $vals = [ref($vals) eq "ARRAY" ? @$vals : $vals];
        for my $val (@$vals) {
            $val = '' unless defined $val;
            $val =~ s/([;\/?:@&=+,\$\[\]%])/$URI::Escape::escapes{$1}/g;
            $val =~ s/ /+/g;
            push(@query, "$key=$val");
        }
    }
    my $query = join '&', @query;
    $query =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go;
    $self->query($query);

    wantarray ? %$param : $self->_query($param);
}

no Moose;
__PACKAGE__->meta->make_immutable;

で、このURI::Hash::httpとURI::httpのベンチを取ってみると・・・

use URI;
use URI::http;
use URI::Hash::http;
use Benchmark qw(cmpthese timethese :hireswallclock);

my $url = 'http://example.com/foo/bar/00000.html?uid=aaaa&hoge=あああ';
my $uri_hash   = URI::Hash::http->new($url);
my $uri_nohash = URI::http->new($url);

cmpthese(1000,{
    uri_hash => sub {
        $uri_hash->scheme;
        $uri_hash->opaque;
        $uri_hash->path;
        $uri_hash->fragment ;
        $uri_hash->as_string;
        $uri_hash->authority ;
        $uri_hash->path_query;
        $uri_hash->query ;
        { $uri_hash->query_form };
        $uri_hash->query_form({ hoge => 'あああ' });
        $uri_hash->query ;
        { $uri_hash->query_form };
        $uri_hash->host;
        $uri_hash->port;
        $uri_hash->host_port;
        $uri_hash->default_port;
        $uri_hash->scheme.'://'.$uri_hash->host.$uri_hash->path;
    },
    uri_nohash    => sub {
        $uri_nohash->scheme;
        $uri_nohash->opaque;
        $uri_nohash->path;
        $uri_nohash->fragment ;
        $uri_nohash->as_string;
        $uri_nohash->authority ;
        $uri_nohash->path_query;
        $uri_nohash->query ;
        { $uri_nohash->query_form };
        $uri_nohash->query_form({ hoge => 'あああ' });
        $uri_nohash->query ;
        { $uri_nohash->query_form };
        $uri_nohash->host;
        $uri_nohash->port;
        $uri_nohash->host_port;
        $uri_nohash->default_port;
        $uri_nohash->scheme.'://'.$uri_nohash->host.$uri_nohash->path;
    },
});

__END__

             Rate uri_nohash   uri_hash
uri_nohash 4566/s         --       -43%
uri_hash   8000/s        75%         --

こんな結果に。やっぱ毎回正規表現しない分Hashベースの方が早い。

ちなみにアクセサの組み合わせだけで実装されてるようなメソッドのみでベンチを取ってみたら

              Rate uri_nohash   uri_hash
uri_nohash  6410/s         --       -50%
uri_hash   12821/s       100%         --

と、素晴らしく早い。

absやrelメソッドとかを実装しだすともしかしたら遅くなっていくかもしれないけども、アクセサ系の使用頻度の方が高い気もするしんー、どうだろねぇ・・・。