diff --git a/modules/Mojo/Asset.pm b/modules/Mojo/Asset.pm index 0174ad8..1666e12 100644 --- a/modules/Mojo/Asset.pm +++ b/modules/Mojo/Asset.pm @@ -1,145 +1,145 @@ package Mojo::Asset; use Mojo::Base 'Mojo::EventEmitter'; -use Carp 'croak'; +use Carp qw(croak); has 'end_range'; has start_range => 0; sub add_chunk { croak 'Method "add_chunk" not implemented by subclass' } sub contains { croak 'Method "contains" not implemented by subclass' } sub get_chunk { croak 'Method "get_chunk" not implemented by subclass' } sub is_file {undef} sub is_range { !!($_[0]->end_range || $_[0]->start_range) } sub move_to { croak 'Method "move_to" not implemented by subclass' } sub mtime { croak 'Method "mtime" not implemented by subclass' } sub size { croak 'Method "size" not implemented by subclass' } sub slurp { croak 'Method "slurp" not implemented by subclass' } sub to_file { croak 'Method "to_file" not implemented by subclass' } 1; =encoding utf8 =head1 NAME Mojo::Asset - HTTP content storage base class =head1 SYNOPSIS package Mojo::Asset::MyAsset; use Mojo::Base 'Mojo::Asset'; sub add_chunk {...} sub contains {...} sub get_chunk {...} sub move_to {...} sub mtime {...} sub size {...} sub slurp {...} sub to_file {...} =head1 DESCRIPTION L is an abstract base class for HTTP content storage backends, like L and L. =head1 EVENTS L inherits all events from L. =head1 ATTRIBUTES L implements the following attributes. =head2 end_range my $end = $asset->end_range; $asset = $asset->end_range(8); Pretend file ends earlier. =head2 start_range my $start = $asset->start_range; $asset = $asset->start_range(3); Pretend file starts later. =head1 METHODS L inherits all methods from L and implements the following new ones. =head2 add_chunk $asset = $asset->add_chunk('foo bar baz'); Add chunk of data to asset. Meant to be overloaded in a subclass. =head2 contains my $position = $asset->contains('bar'); Check if asset contains a specific string. Meant to be overloaded in a subclass. =head2 get_chunk my $bytes = $asset->get_chunk($offset); my $bytes = $asset->get_chunk($offset, $max); Get chunk of data starting from a specific position, defaults to a maximum chunk size of C<131072> bytes (128KiB). Meant to be overloaded in a subclass. =head2 is_file my $bool = $asset->is_file; False, this is not a L object. =head2 is_range my $bool = $asset->is_range; Check if asset has a L or L. =head2 move_to $asset = $asset->move_to('/home/sri/foo.txt'); Move asset data into a specific file. Meant to be overloaded in a subclass. =head2 mtime my $mtime = $asset->mtime; Modification time of asset. Meant to be overloaded in a subclass. =head2 size my $size = $asset->size; Size of asset data in bytes. Meant to be overloaded in a subclass. =head2 slurp my $bytes = $asset->slurp; Read all asset data at once. Meant to be overloaded in a subclass. =head2 to_file my $file = $asset->to_file; Convert asset to L object. Meant to be overloaded in a subclass. =head1 SEE ALSO L, L, L. =cut diff --git a/modules/Mojo/Asset/File.pm b/modules/Mojo/Asset/File.pm index 75966de..634ce31 100644 --- a/modules/Mojo/Asset/File.pm +++ b/modules/Mojo/Asset/File.pm @@ -1,273 +1,273 @@ package Mojo::Asset::File; use Mojo::Base 'Mojo::Asset'; -use Carp 'croak'; -use Fcntl 'SEEK_SET'; +use Carp qw(croak); +use Fcntl qw(SEEK_SET); use File::Spec::Functions (); -use Mojo::File 'tempfile'; +use Mojo::File qw(tempfile); has [qw(cleanup path)]; has handle => sub { my $self = shift; # Open existing file my $path = $self->path; return Mojo::File->new($path)->open('<') if defined $path && -e $path; $self->cleanup(1) unless defined $self->cleanup; # Create a specific file return Mojo::File->new($path)->open('+>>') if defined $path; # Create a temporary file my $template = 'mojo.tmp.XXXXXXXXXXXXXXXX'; my $file = tempfile DIR => $self->tmpdir, TEMPLATE => $template, UNLINK => 0; $self->path($file->to_string); return $file->open('+>>'); }; has tmpdir => sub { $ENV{MOJO_TMPDIR} || File::Spec::Functions::tmpdir }; sub DESTROY { my $self = shift; return unless $self->cleanup && defined(my $path = $self->path); if (my $handle = $self->handle) { close $handle } # Only the process that created the file is allowed to remove it Mojo::File->new($path)->remove if -w $path && ($self->{pid} // $$) == $$; } sub add_chunk { my ($self, $chunk) = @_; ($self->handle->syswrite($chunk) // -1) == length $chunk or croak "Can't write to asset: $!"; return $self; } sub contains { my ($self, $str) = @_; my $handle = $self->handle; $handle->sysseek($self->start_range, SEEK_SET); # Calculate window size my $end = $self->end_range // $self->size; my $len = length $str; my $size = $len > 131072 ? $len : 131072; $size = $end - $self->start_range if $size > $end - $self->start_range; # Sliding window search my $offset = 0; my $start = $handle->sysread(my $window, $len); while ($offset < $end) { # Read as much as possible my $diff = $end - ($start + $offset); my $read = $handle->sysread(my $buffer, $diff < $size ? $diff : $size); $window .= $buffer; # Search window my $pos = index $window, $str; return $offset + $pos if $pos >= 0; - return -1 if $read == 0 || ($offset += $read) == $end; + return -1 if $read == 0 || ($offset += $read) == $end; # Resize window substr $window, 0, $read, ''; } return -1; } sub get_chunk { my ($self, $offset, $max) = @_; $max //= 131072; $offset += $self->start_range; my $handle = $self->handle; $handle->sysseek($offset, SEEK_SET); my $buffer; if (defined(my $end = $self->end_range)) { return '' if (my $chunk = $end + 1 - $offset) <= 0; $handle->sysread($buffer, $chunk > $max ? $max : $chunk); } else { $handle->sysread($buffer, $max) } return $buffer; } sub is_file {1} sub move_to { my ($self, $to) = @_; # Windows requires that the handle is closed close $self->handle; delete $self->{handle}; # Move file and prevent clean up Mojo::File->new($self->path)->move_to($to); return $self->path($to)->cleanup(0); } sub mtime { (stat shift->handle)[9] } sub new { my $file = shift->SUPER::new(@_); $file->{pid} = $$; return $file; } sub size { -s shift->handle } sub slurp { my $handle = shift->handle; $handle->sysseek(0, SEEK_SET); my $ret = my $content = ''; while ($ret = $handle->sysread(my $buffer, 131072, 0)) { $content .= $buffer } return defined $ret ? $content : croak "Can't read from asset: $!"; } sub to_file {shift} 1; =encoding utf8 =head1 NAME Mojo::Asset::File - File storage for HTTP content =head1 SYNOPSIS use Mojo::Asset::File; # Temporary file my $file = Mojo::Asset::File->new; $file->add_chunk('foo bar baz'); say 'File contains "bar"' if $file->contains('bar') >= 0; say $file->slurp; # Existing file my $file = Mojo::Asset::File->new(path => '/home/sri/foo.txt'); $file->move_to('/yada.txt'); say $file->slurp; =head1 DESCRIPTION L is a file storage backend for HTTP content. =head1 EVENTS L inherits all events from L. =head1 ATTRIBUTES L inherits all attributes from L and implements the following new ones. =head2 cleanup my $bool = $file->cleanup; $file = $file->cleanup($bool); Delete L automatically once the file is not used anymore. =head2 handle my $handle = $file->handle; $file = $file->handle(IO::File->new); Filehandle, created on demand for L, which can be generated automatically and safely based on L. =head2 path my $path = $file->path; $file = $file->path('/home/sri/foo.txt'); File path used to create L. =head2 tmpdir my $tmpdir = $file->tmpdir; $file = $file->tmpdir('/tmp'); Temporary directory used to generate L, defaults to the value of the C environment variable or auto-detection. =head1 METHODS L inherits all methods from L and implements the following new ones. =head2 add_chunk $file = $file->add_chunk('foo bar baz'); Add chunk of data. =head2 contains my $position = $file->contains('bar'); Check if asset contains a specific string. =head2 get_chunk my $bytes = $file->get_chunk($offset); my $bytes = $file->get_chunk($offset, $max); Get chunk of data starting from a specific position, defaults to a maximum chunk size of C<131072> bytes (128KiB). =head2 is_file my $bool = $file->is_file; True, this is a L object. =head2 move_to $file = $file->move_to('/home/sri/bar.txt'); Move asset data into a specific file and disable L. =head2 mtime my $mtime = $file->mtime; Modification time of asset. =head2 new my $file = Mojo::Asset::File->new; my $file = Mojo::Asset::File->new(path => '/home/sri/test.txt'); my $file = Mojo::Asset::File->new({path => '/home/sri/test.txt'}); Construct a new L object. =head2 size my $size = $file->size; Size of asset data in bytes. =head2 slurp my $bytes = $file->slurp; Read all asset data at once. =head2 to_file $file = $file->to_file; Does nothing but return the invocant, since we already have a L object. =head1 SEE ALSO L, L, L. =cut diff --git a/modules/Mojo/Asset/Memory.pm b/modules/Mojo/Asset/Memory.pm index 2d10cef..ee5a29e 100644 --- a/modules/Mojo/Asset/Memory.pm +++ b/modules/Mojo/Asset/Memory.pm @@ -1,174 +1,174 @@ package Mojo::Asset::Memory; use Mojo::Base 'Mojo::Asset'; use Mojo::Asset::File; -use Mojo::File 'path'; +use Mojo::File qw(path); has 'auto_upgrade'; has max_memory_size => sub { $ENV{MOJO_MAX_MEMORY_SIZE} || 262144 }; has mtime => sub {$^T}; sub add_chunk { my ($self, $chunk) = @_; # Upgrade if necessary $self->{content} .= $chunk; return $self if !$self->auto_upgrade || $self->size <= $self->max_memory_size; $self->emit(upgrade => my $file = $self->to_file); return $file; } sub contains { my ($self, $str) = @_; my $start = $self->start_range; my $pos = index $self->{content} // '', $str, $start; $pos -= $start if $start && $pos >= 0; my $end = $self->end_range; return $end && ($pos + length $str) >= $end ? -1 : $pos; } sub get_chunk { my ($self, $offset, $max) = @_; $max //= 131072; $offset += $self->start_range; if (my $end = $self->end_range) { $max = $end + 1 - $offset if ($offset + $max) > $end; } return substr shift->{content} // '', $offset, $max; } sub move_to { path($_[1])->spurt($_[0]{content} // '') and return $_[0] } sub size { length(shift->{content} // '') } sub slurp { shift->{content} // '' } sub to_file { Mojo::Asset::File->new->add_chunk(shift->slurp) } 1; =encoding utf8 =head1 NAME Mojo::Asset::Memory - In-memory storage for HTTP content =head1 SYNOPSIS use Mojo::Asset::Memory; my $mem = Mojo::Asset::Memory->new; $mem->add_chunk('foo bar baz'); say $mem->slurp; =head1 DESCRIPTION L is an in-memory storage backend for HTTP content. =head1 EVENTS L inherits all events from L and can emit the following new ones. =head2 upgrade $mem->on(upgrade => sub { my ($mem, $file) = @_; ... }); Emitted when asset gets upgraded to a L object. $mem->on(upgrade => sub { my ($mem, $file) = @_; $file->tmpdir('/tmp'); }); =head1 ATTRIBUTES L inherits all attributes from L and implements the following new ones. =head2 auto_upgrade my $bool = $mem->auto_upgrade; $mem = $mem->auto_upgrade($bool); Try to detect if content size exceeds L limit and automatically upgrade to a L object. =head2 max_memory_size my $size = $mem->max_memory_size; $mem = $mem->max_memory_size(1024); Maximum size in bytes of data to keep in memory before automatically upgrading to a L object, defaults to the value of the C environment variable or C<262144> (256KiB). =head2 mtime my $mtime = $mem->mtime; $mem = $mem->mtime(1408567500); Modification time of asset, defaults to the value of C<$^T>. =head1 METHODS L inherits all methods from L and implements the following new ones. =head2 add_chunk $mem = $mem->add_chunk('foo bar baz'); my $file = $mem->add_chunk('abc' x 262144); Add chunk of data and upgrade to L object if necessary. =head2 contains my $position = $mem->contains('bar'); Check if asset contains a specific string. =head2 get_chunk my $bytes = $mem->get_chunk($offset); my $bytes = $mem->get_chunk($offset, $max); Get chunk of data starting from a specific position, defaults to a maximum chunk size of C<131072> bytes (128KiB). =head2 move_to $mem = $mem->move_to('/home/sri/foo.txt'); Move asset data into a specific file. =head2 size my $size = $mem->size; Size of asset data in bytes. =head2 slurp my $bytes = mem->slurp; Read all asset data at once. =head2 to_file my $file = $mem->to_file; Convert asset to L object. =head1 SEE ALSO L, L, L. =cut diff --git a/modules/Mojo/Base.pm b/modules/Mojo/Base.pm index 8442b58..25b93f5 100644 --- a/modules/Mojo/Base.pm +++ b/modules/Mojo/Base.pm @@ -1,419 +1,420 @@ package Mojo::Base; use strict; use warnings; use utf8; use feature ':5.10'; use mro; # No imports because we get subclassed, a lot! use Carp (); use Scalar::Util (); # Defer to runtime so Mojo::Util can use "-strict" require Mojo::Util; # Only Perl 5.14+ requires it on demand use IO::Handle (); # Role support requires Role::Tiny 2.000001+ use constant ROLES => !!(eval { require Role::Tiny; Role::Tiny->VERSION('2.000001'); 1 }); -# async/await support requires Future::AsyncAwait 0.35+ +# async/await support requires Future::AsyncAwait 0.36+ use constant ASYNC => $ENV{MOJO_NO_ASYNC} ? 0 : !!(eval { require Future::AsyncAwait; - Future::AsyncAwait->VERSION('0.35'); + Future::AsyncAwait->VERSION('0.36'); 1; }); # Protect subclasses using AUTOLOAD sub DESTROY { } sub attr { my ($self, $attrs, $value, %kv) = @_; return unless (my $class = ref $self || $self) && $attrs; Carp::croak 'Default has to be a code reference or constant value' if ref $value && ref $value ne 'CODE'; Carp::croak 'Unsupported attribute option' if grep { $_ ne 'weak' } keys %kv; # Weaken if ($kv{weak}) { state %weak_names; unless ($weak_names{$class}) { my $names = $weak_names{$class} = []; my $sub = sub { my $self = shift->next::method(@_); ref $self->{$_} and Scalar::Util::weaken $self->{$_} for @$names; return $self; }; Mojo::Util::monkey_patch(my $base = $class . '::_Base', 'new', $sub); no strict 'refs'; unshift @{"${class}::ISA"}, $base; } push @{$weak_names{$class}}, ref $attrs eq 'ARRAY' ? @$attrs : $attrs; } for my $attr (@{ref $attrs eq 'ARRAY' ? $attrs : [$attrs]}) { Carp::croak qq{Attribute "$attr" invalid} unless $attr =~ /^[a-zA-Z_]\w*$/; # Very performance-sensitive code with lots of micro-optimizations my $sub; if ($kv{weak}) { if (ref $value) { $sub = sub { return exists $_[0]{$attr} ? $_[0]{$attr} : ( ref($_[0]{$attr} = $value->($_[0])) && Scalar::Util::weaken($_[0]{$attr}), $_[0]{$attr} ) if @_ == 1; ref($_[0]{$attr} = $_[1]) and Scalar::Util::weaken($_[0]{$attr}); $_[0]; }; } else { $sub = sub { return $_[0]{$attr} if @_ == 1; ref($_[0]{$attr} = $_[1]) and Scalar::Util::weaken($_[0]{$attr}); $_[0]; }; } } elsif (ref $value) { $sub = sub { return exists $_[0]{$attr} ? $_[0]{$attr} : ($_[0]{$attr} = $value->($_[0])) if @_ == 1; $_[0]{$attr} = $_[1]; $_[0]; }; } elsif (defined $value) { $sub = sub { return exists $_[0]{$attr} ? $_[0]{$attr} : ($_[0]{$attr} = $value) if @_ == 1; $_[0]{$attr} = $_[1]; $_[0]; }; } else { $sub = sub { return $_[0]{$attr} if @_ == 1; $_[0]{$attr} = $_[1]; $_[0] }; } Mojo::Util::monkey_patch($class, $attr, $sub); } } sub import { my ($class, $caller) = (shift, caller); return unless my @flags = @_; # Mojo modules are strict! $_->import for qw(strict warnings utf8); feature->import(':5.10'); while (my $flag = shift @flags) { # Base if ($flag eq '-base') { push @flags, $class } # Role elsif ($flag eq '-role') { Carp::croak 'Role::Tiny 2.000001+ is required for roles' unless ROLES; Mojo::Util::monkey_patch($caller, 'has', sub { attr($caller, @_) }); eval "package $caller; use Role::Tiny; 1" or die $@; } # async/await - elsif ($flag eq '-async') { - Carp::croak 'Future::AsyncAwait 0.35+ is required for async/await' + elsif ($flag eq '-async_await') { + Carp::croak 'Future::AsyncAwait 0.36+ is required for async/await' unless ASYNC; + require Mojo::Promise; Future::AsyncAwait->import_into($caller, future_class => 'Mojo::Promise'); } # Signatures (Perl 5.20+) elsif ($flag eq '-signatures') { Carp::croak 'Subroutine signatures require Perl 5.20+' if $] < 5.020; require experimental; experimental->import('signatures'); } # Module elsif ($flag !~ /^-/) { no strict 'refs'; require(Mojo::Util::class_to_path($flag)) unless $flag->can('new'); push @{"${caller}::ISA"}, $flag; Mojo::Util::monkey_patch($caller, 'has', sub { attr($caller, @_) }); } elsif ($flag ne '-strict') { Carp::croak "Unsupported flag: $flag" } } } sub new { my $class = shift; bless @_ ? @_ > 1 ? {@_} : {%{$_[0]}} : {}, ref $class || $class; } sub tap { my ($self, $cb) = (shift, shift); $_->$cb(@_) for $self; return $self; } sub with_roles { Carp::croak 'Role::Tiny 2.000001+ is required for roles' unless ROLES; my ($self, @roles) = @_; return $self unless @roles; return Role::Tiny->create_class_with_roles($self, map { /^\+(.+)$/ ? "${self}::Role::$1" : $_ } @roles) unless my $class = Scalar::Util::blessed $self; return Role::Tiny->apply_roles_to_object($self, map { /^\+(.+)$/ ? "${class}::Role::$1" : $_ } @roles); } 1; =encoding utf8 =head1 NAME Mojo::Base - Minimal base class for Mojo projects =head1 SYNOPSIS package Cat; use Mojo::Base -base; has name => 'Nyan'; has ['age', 'weight'] => 4; package Tiger; use Mojo::Base 'Cat'; has friend => sub { Cat->new }; has stripes => 42; package main; use Mojo::Base -strict; my $mew = Cat->new(name => 'Longcat'); say $mew->age; say $mew->age(3)->weight(5)->age; my $rawr = Tiger->new(stripes => 38, weight => 250); say $rawr->tap(sub { $_->friend->name('Tacgnol') })->weight; =head1 DESCRIPTION L is a simple base class for L projects with fluent interfaces. # Automatically enables "strict", "warnings", "utf8" and Perl 5.10 features use Mojo::Base -strict; use Mojo::Base -base; use Mojo::Base 'SomeBaseClass'; use Mojo::Base -role; All four forms save a lot of typing. Note that role support depends on L (2.000001+). # use Mojo::Base -strict; use strict; use warnings; use utf8; use feature ':5.10'; use mro; use IO::Handle (); # use Mojo::Base -base; use strict; use warnings; use utf8; use feature ':5.10'; use mro; use IO::Handle (); push @ISA, 'Mojo::Base'; sub has { Mojo::Base::attr(__PACKAGE__, @_) } # use Mojo::Base 'SomeBaseClass'; use strict; use warnings; use utf8; use feature ':5.10'; use mro; use IO::Handle (); require SomeBaseClass; push @ISA, 'SomeBaseClass'; sub has { Mojo::Base::attr(__PACKAGE__, @_) } # use Mojo::Base -role; use strict; use warnings; use utf8; use feature ':5.10'; use mro; use IO::Handle (); use Role::Tiny; sub has { Mojo::Base::attr(__PACKAGE__, @_) } On Perl 5.20+ you can also use the C<-signatures> flag with all four forms and enable support for L. # Also enable signatures use Mojo::Base -strict, -signatures; use Mojo::Base -base, -signatures; use Mojo::Base 'SomeBaseClass', -signatures; use Mojo::Base -role, -signatures; -If you have L 0.35+ installed you can also use the C<-async> -flag to activate the C and C keywords to deal much more -efficiently with promises. Note that this feature is B and might -change without warning! +If you have L 0.36+ installed you can also use the +C<-async_await> flag to activate the C and C keywords to deal much +more efficiently with promises. Note that this feature is B and +might change without warning! # Also enable async/await - use Mojo::Base -strict, -async; - use Mojo::Base -base, -signatures, -async; + use Mojo::Base -strict, -async_await; + use Mojo::Base -base, -signatures, -async_await; This will also disable experimental warnings on versions of Perl where this feature was still experimental. =head1 FLUENT INTERFACES Fluent interfaces are a way to design object-oriented APIs around method chaining to create domain-specific languages, with the goal of making the readability of the source code close to written prose. package Duck; use Mojo::Base -base; has 'name'; sub quack { my $self = shift; my $name = $self->name; say "$name: Quack!" } L will help you with this by having all attribute accessors created with L (or L) return their invocant (C<$self>) whenever they are used to assign a new attribute value. Duck->new->name('Donald')->quack; In this case the C attribute accessor is called on the object created by Cnew>. It assigns a new attribute value and then returns the C object, so the C method can be called on it afterwards. These method chains can continue until one of the methods called does not return the C object. =head1 FUNCTIONS L implements the following functions, which can be imported with the C<-base> flag or by setting a base class. =head2 has has 'name'; has ['name1', 'name2', 'name3']; has name => 'foo'; has name => sub {...}; has ['name1', 'name2', 'name3'] => 'foo'; has ['name1', 'name2', 'name3'] => sub {...}; has name => sub {...}, weak => 1; has name => undef, weak => 1; has ['name1', 'name2', 'name3'] => sub {...}, weak => 1; Create attributes for hash-based objects, just like the L method. =head1 METHODS L implements the following methods. =head2 attr $object->attr('name'); SubClass->attr('name'); SubClass->attr(['name1', 'name2', 'name3']); SubClass->attr(name => 'foo'); SubClass->attr(name => sub {...}); SubClass->attr(['name1', 'name2', 'name3'] => 'foo'); SubClass->attr(['name1', 'name2', 'name3'] => sub {...}); SubClass->attr(name => sub {...}, weak => 1); SubClass->attr(name => undef, weak => 1); SubClass->attr(['name1', 'name2', 'name3'] => sub {...}, weak => 1); Create attribute accessors for hash-based objects, an array reference can be used to create more than one at a time. Pass an optional second argument to set a default value, it should be a constant or a callback. The callback will be executed at accessor read time if there's no set value, and gets passed the current instance of the object as first argument. Accessors can be chained, that means they return their invocant when they are called with an argument. These options are currently available: =over 2 =item weak weak => $bool Weaken attribute reference to avoid L and memory leaks. =back =head2 new my $object = SubClass->new; my $object = SubClass->new(name => 'value'); my $object = SubClass->new({name => 'value'}); This base class provides a basic constructor for hash-based objects. You can pass it either a hash or a hash reference with attribute values. =head2 tap $object = $object->tap(sub {...}); $object = $object->tap('some_method'); $object = $object->tap('some_method', @args); Tap into a method chain to perform operations on an object within the chain (also known as a K combinator or Kestrel). The object will be the first argument passed to the callback, and is also available as C<$_>. The callback's return value will be ignored; instead, the object (the callback's first argument) will be the return value. In this way, arbitrary code can be used within (i.e., spliced or tapped into) a chained set of object method calls. # Longer version $object = $object->tap(sub { $_->some_method(@args) }); # Inject side effects into a method chain $object->foo('A')->tap(sub { say $_->foo })->foo('B'); =head2 with_roles my $new_class = SubClass->with_roles('SubClass::Role::One'); my $new_class = SubClass->with_roles('+One', '+Two'); $object = $object->with_roles('+One', '+Two'); Create a new class with one or more L roles. If called on a class returns the new class, or if called on an object reblesses the object into the new class. For roles following the naming scheme C you can use the shorthand C<+RoleName>. Note that role support depends on L (2.000001+). # Create a new class with the role "SubClass::Role::Foo" and instantiate it my $new_class = SubClass->with_roles('+Foo'); my $object = $new_class->new; =head1 SEE ALSO L, L, L. =cut diff --git a/modules/Mojo/ByteStream.pm b/modules/Mojo/ByteStream.pm index 5302aaf..306ac64 100644 --- a/modules/Mojo/ByteStream.pm +++ b/modules/Mojo/ByteStream.pm @@ -1,393 +1,401 @@ package Mojo::ByteStream; use Mojo::Base -strict; use overload bool => sub {1}, '""' => sub { ${$_[0]} }, fallback => 1; -use Exporter 'import'; +use Exporter qw(import); use Mojo::Collection; use Mojo::Util; our @EXPORT_OK = ('b'); # Turn most functions from Mojo::Util into methods my @UTILS = ( qw(b64_decode b64_encode camelize decamelize gunzip gzip hmac_sha1_sum), - qw(html_unescape md5_bytes md5_sum punycode_decode punycode_encode quote), - qw(sha1_bytes sha1_sum slugify term_escape trim unindent unquote url_escape), - qw(url_unescape xml_escape xor_encode) + qw(html_unescape humanize_bytes md5_bytes md5_sum punycode_decode), + qw(punycode_encode quote sha1_bytes sha1_sum slugify term_escape trim), + qw(unindent unquote url_escape url_unescape xml_escape xor_encode) ); for my $name (@UTILS) { my $sub = Mojo::Util->can($name); Mojo::Util::monkey_patch __PACKAGE__, $name, sub { my $self = shift; $$self = $sub->($$self, @_); return $self; }; } sub b { __PACKAGE__->new(@_) } sub clone { $_[0]->new(${$_[0]}) } sub decode { shift->_delegate(\&Mojo::Util::decode, @_) } sub encode { shift->_delegate(\&Mojo::Util::encode, @_) } sub new { my $class = shift; return bless \(my $dummy = join '', @_), ref $class || $class; } sub say { my ($self, $handle) = @_; $handle ||= \*STDOUT; say $handle $$self; return $self; } sub secure_compare { Mojo::Util::secure_compare ${shift()}, shift } sub size { length ${$_[0]} } sub split { my ($self, $pat, $lim) = (shift, shift, shift // 0); return Mojo::Collection->new(map { $self->new($_) } split $pat, $$self, $lim); } sub tap { shift->Mojo::Base::tap(@_) } sub to_string { ${$_[0]} } sub with_roles { shift->Mojo::Base::with_roles(@_) } sub _delegate { my ($self, $sub) = (shift, shift); $$self = $sub->(shift || 'UTF-8', $$self); return $self; } 1; =encoding utf8 =head1 NAME Mojo::ByteStream - ByteStream =head1 SYNOPSIS use Mojo::ByteStream; # Manipulate bytestream my $stream = Mojo::ByteStream->new('foo_bar_baz'); say $stream->camelize; # Chain methods my $stream = Mojo::ByteStream->new('foo bar baz')->quote; $stream = $stream->unquote->encode('UTF-8')->b64_encode(''); say "$stream"; # Use the alternative constructor - use Mojo::ByteStream 'b'; + use Mojo::ByteStream qw(b); my $stream = b('foobarbaz')->b64_encode('')->say; =head1 DESCRIPTION L is a scalar-based container for bytestreams that provides a more friendly API for many of the functions in L. # Access scalar directly to manipulate bytestream my $stream = Mojo::ByteStream->new('foo'); $$stream .= 'bar'; =head1 FUNCTIONS L implements the following functions, which can be imported individually. =head2 b my $stream = b('test123'); Construct a new scalar-based L object. =head1 METHODS L implements the following methods. =head2 b64_decode $stream = $stream->b64_decode; Base64 decode bytestream with L. =head2 b64_encode $stream = $stream->b64_encode; $stream = $stream->b64_encode("\n"); Base64 encode bytestream with L. # "Zm9vIGJhciBiYXo=" b('foo bar baz')->b64_encode(''); =head2 camelize $stream = $stream->camelize; Camelize bytestream with L. =head2 clone my $stream2 = $stream->clone; Return a new L object cloned from this bytestream. =head2 decamelize $stream = $stream->decamelize; Decamelize bytestream with L. =head2 decode $stream = $stream->decode; $stream = $stream->decode('iso-8859-1'); Decode bytestream with L, defaults to using C. # "♥" b('%E2%99%A5')->url_unescape->decode; =head2 encode $stream = $stream->encode; $stream = $stream->encode('iso-8859-1'); Encode bytestream with L, defaults to using C. # "%E2%99%A5" b('♥')->encode->url_escape; =head2 gunzip $stream = $stream->gunzip; Uncompress bytestream with L. =head2 gzip stream = $stream->gzip; Compress bytestream with L. =head2 hmac_sha1_sum $stream = $stream->hmac_sha1_sum('passw0rd'); Generate HMAC-SHA1 checksum for bytestream with L. # "7fbdc89263974a89210ea71f171c77d3f8c21471" b('foo bar baz')->hmac_sha1_sum('secr3t'); =head2 html_unescape $stream = $stream->html_unescape; Unescape all HTML entities in bytestream with L. # "%3Chtml%3E" b('<html>')->html_unescape->url_escape; +=head2 humanize_bytes + + $stream = $stream->humanize_bytes; + +Turn number of bytes into a simplified human readable format for bytestream with +L. Note that this method is B and +might change without warning! + =head2 md5_bytes $stream = $stream->md5_bytes; Generate binary MD5 checksum for bytestream with L. =head2 md5_sum $stream = $stream->md5_sum; Generate MD5 checksum for bytestream with L. =head2 new my $stream = Mojo::ByteStream->new('test123'); Construct a new scalar-based L object. =head2 punycode_decode $stream = $stream->punycode_decode; Punycode decode bytestream with L. =head2 punycode_encode $stream = $stream->punycode_encode; Punycode encode bytestream with L. =head2 quote $stream = $stream->quote; Quote bytestream with L. =head2 say $stream = $stream->say; $stream = $stream->say(*STDERR); Print bytestream to handle and append a newline, defaults to using C. =head2 secure_compare my $bool = $stream->secure_compare($str); Compare bytestream with L. =head2 sha1_bytes $stream = $stream->sha1_bytes; Generate binary SHA1 checksum for bytestream with L. =head2 sha1_sum $stream = $stream->sha1_sum; Generate SHA1 checksum for bytestream with L. =head2 size my $size = $stream->size; Size of bytestream. =head2 slugify $stream = $stream->slugify; $stream = $stream->slugify($bool); Generate URL slug for bytestream with L. =head2 split my $collection = $stream->split(','); my $collection = $stream->split(',', -1); Turn bytestream into L object containing L objects. # "One,Two,Three" b("one,two,three")->split(',')->map('camelize')->join(','); # "One,Two,Three,,," b("one,two,three,,,")->split(',', -1)->map('camelize')->join(','); =head2 tap $stream = $stream->tap(sub {...}); Alias for L. =head2 term_escape $stream = $stream->term_escape; Escape POSIX control characters in bytestream with L. # Print binary checksum to terminal b('foo')->sha1_bytes->term_escape->say; =head2 to_string my $str = $stream->to_string; Stringify bytestream. =head2 trim $stream = $stream->trim; Trim whitespace characters from both ends of bytestream with L. =head2 unindent $stream = $stream->unindent; Unindent bytestream with L. =head2 unquote $stream = $stream->unquote; Unquote bytestream with L. =head2 url_escape $stream = $stream->url_escape; $stream = $stream->url_escape('^A-Za-z0-9\-._~'); Percent encode all unsafe characters in bytestream with L. # "%E2%98%83" b('☃')->encode->url_escape; =head2 url_unescape $stream = $stream->url_unescape; Decode percent encoded characters in bytestream with L. # "<html>" b('%3Chtml%3E')->url_unescape->xml_escape; =head2 with_roles my $new_class = Mojo::ByteStream->with_roles('Mojo::ByteStream::Role::One'); my $new_class = Mojo::ByteStream->with_roles('+One', '+Two'); $stream = $stream->with_roles('+One', '+Two'); Alias for L. =head2 xml_escape $stream = $stream->xml_escape; Escape only the characters C<&>, C>, C>, C<"> and C<'> in bytestream with L. =head2 xor_encode $stream = $stream->xor_encode($key); XOR encode bytestream with L. # "%04%0E%15B%03%1B%10" b('foo bar')->xor_encode('baz')->url_escape; =head1 OPERATORS L overloads the following operators. =head2 bool my $bool = !!$bytestream; Always true. =head2 stringify my $str = "$bytestream"; Alias for L. =head1 SEE ALSO L, L, L. =cut diff --git a/modules/Mojo/Collection.pm b/modules/Mojo/Collection.pm index 1f46622..6bb9e63 100644 --- a/modules/Mojo/Collection.pm +++ b/modules/Mojo/Collection.pm @@ -1,414 +1,414 @@ package Mojo::Collection; use Mojo::Base -strict; -use re 'is_regexp'; -use Carp 'croak'; -use Exporter 'import'; +use re qw(is_regexp); +use Carp qw(croak); +use Exporter qw(import); use List::Util; use Mojo::ByteStream; -use Scalar::Util 'blessed'; +use Scalar::Util qw(blessed); our @EXPORT_OK = ('c'); sub TO_JSON { [@{shift()}] } sub c { __PACKAGE__->new(@_) } sub compact { my $self = shift; return $self->new(grep { defined && (ref || length) } @$self); } sub each { my ($self, $cb) = @_; return @$self unless $cb; my $i = 1; $_->$cb($i++) for @$self; return $self; } sub first { my ($self, $cb) = (shift, shift); return $self->[0] unless $cb; return List::Util::first { $_ =~ $cb } @$self if is_regexp $cb; return List::Util::first { $_->$cb(@_) } @$self; } sub flatten { $_[0]->new(_flatten(@{$_[0]})) } sub grep { my ($self, $cb) = (shift, shift); return $self->new(grep { $_ =~ $cb } @$self) if is_regexp $cb; return $self->new(grep { $_->$cb(@_) } @$self); } sub head { my ($self, $size) = @_; return $self->new(@$self) if $size > @$self; return $self->new(@$self[0 .. ($size - 1)]) if $size >= 0; return $self->new(@$self[0 .. ($#$self + $size)]); } sub join { Mojo::ByteStream->new(join $_[1] // '', map {"$_"} @{$_[0]}); } sub last { shift->[-1] } sub map { my ($self, $cb) = (shift, shift); return $self->new(map { $_->$cb(@_) } @$self); } sub new { my $class = shift; return bless [@_], ref $class || $class; } sub reduce { my $self = shift; @_ = (@_, @$self); goto &List::Util::reduce; } sub reverse { $_[0]->new(reverse @{$_[0]}) } sub shuffle { $_[0]->new(List::Util::shuffle @{$_[0]}) } sub size { scalar @{$_[0]} } sub sort { my ($self, $cb) = @_; return $self->new(sort @$self) unless $cb; my $caller = caller; no strict 'refs'; my @sorted = sort { local (*{"${caller}::a"}, *{"${caller}::b"}) = (\$a, \$b); $a->$cb($b); } @$self; return $self->new(@sorted); } sub tail { my ($self, $size) = @_; return $self->new(@$self) if $size > @$self; return $self->new(@$self[($#$self - ($size - 1)) .. $#$self]) if $size >= 0; return $self->new(@$self[(0 - $size) .. $#$self]); } sub tap { shift->Mojo::Base::tap(@_) } sub to_array { [@{shift()}] } sub uniq { my ($self, $cb) = (shift, shift); my %seen; return $self->new(grep { !$seen{$_->$cb(@_) // ''}++ } @$self) if $cb; return $self->new(grep { !$seen{$_ // ''}++ } @$self); } sub with_roles { shift->Mojo::Base::with_roles(@_) } sub _flatten { map { _ref($_) ? _flatten(@$_) : $_ } @_; } sub _ref { ref $_[0] eq 'ARRAY' || blessed $_[0] && $_[0]->isa(__PACKAGE__) } 1; =encoding utf8 =head1 NAME Mojo::Collection - Collection =head1 SYNOPSIS use Mojo::Collection; # Manipulate collection my $collection = Mojo::Collection->new(qw(just works)); unshift @$collection, 'it'; say $collection->join("\n"); # Chain methods $collection->map(sub { ucfirst })->shuffle->each(sub { my ($word, $num) = @_; say "$num: $word"; }); # Use the alternative constructor - use Mojo::Collection 'c'; + use Mojo::Collection qw(c); c(qw(a b c))->join('/')->url_escape->say; =head1 DESCRIPTION L is an array-based container for collections. # Access array directly to manipulate collection my $collection = Mojo::Collection->new(1 .. 25); $collection->[23] += 100; say for @$collection; =head1 FUNCTIONS L implements the following functions, which can be imported individually. =head2 c my $collection = c(1, 2, 3); Construct a new array-based L object. =head1 METHODS L implements the following methods. =head2 TO_JSON my $array = $collection->TO_JSON; Alias for L. =head2 compact my $new = $collection->compact; Create a new collection with all elements that are defined and not an empty string. # "0, 1, 2, 3" c(0, 1, undef, 2, '', 3)->compact->join(', '); =head2 each my @elements = $collection->each; $collection = $collection->each(sub {...}); Evaluate callback for each element in collection, or return all elements as a list if none has been provided. The element will be the first argument passed to the callback, and is also available as C<$_>. # Make a numbered list $collection->each(sub { my ($e, $num) = @_; say "$num: $e"; }); =head2 first my $first = $collection->first; my $first = $collection->first(qr/foo/); my $first = $collection->first(sub {...}); my $first = $collection->first('some_method'); my $first = $collection->first('some_method', @args); Evaluate regular expression/callback for, or call method on, each element in collection and return the first one that matched the regular expression, or for which the callback/method returned true. The element will be the first argument passed to the callback, and is also available as C<$_>. # Longer version my $first = $collection->first(sub { $_->some_method(@args) }); # Find first value that contains the word "mojo" my $interesting = $collection->first(qr/mojo/i); # Find first value that is greater than 5 my $greater = $collection->first(sub { $_ > 5 }); =head2 flatten my $new = $collection->flatten; Flatten nested collections/arrays recursively and create a new collection with all elements. # "1, 2, 3, 4, 5, 6, 7" c(1, [2, [3, 4], 5, [6]], 7)->flatten->join(', '); =head2 grep my $new = $collection->grep(qr/foo/); my $new = $collection->grep(sub {...}); my $new = $collection->grep('some_method'); my $new = $collection->grep('some_method', @args); Evaluate regular expression/callback for, or call method on, each element in collection and create a new collection with all elements that matched the regular expression, or for which the callback/method returned true. The element will be the first argument passed to the callback, and is also available as C<$_>. # Longer version my $new = $collection->grep(sub { $_->some_method(@args) }); # Find all values that contain the word "mojo" my $interesting = $collection->grep(qr/mojo/i); # Find all values that are greater than 5 my $greater = $collection->grep(sub { $_ > 5 }); =head2 head my $new = $collection->head(4); my $new = $collection->head(-2); Create a new collection with up to the specified number of elements from the beginning of the collection. A negative number will count from the end. # "A B C" c('A', 'B', 'C', 'D', 'E')->head(3)->join(' '); # "A B" c('A', 'B', 'C', 'D', 'E')->head(-3)->join(' '); =head2 join my $stream = $collection->join; my $stream = $collection->join("\n"); Turn collection into L. # Join all values with commas $collection->join(', ')->say; =head2 last my $last = $collection->last; Return the last element in collection. =head2 map my $new = $collection->map(sub {...}); my $new = $collection->map('some_method'); my $new = $collection->map('some_method', @args); Evaluate callback for, or call method on, each element in collection and create a new collection from the results. The element will be the first argument passed to the callback, and is also available as C<$_>. # Longer version my $new = $collection->map(sub { $_->some_method(@args) }); # Append the word "mojo" to all values my $mojoified = $collection->map(sub { $_ . 'mojo' }); =head2 new my $collection = Mojo::Collection->new(1, 2, 3); Construct a new array-based L object. =head2 reduce my $result = $collection->reduce(sub {...}); my $result = $collection->reduce(sub {...}, $initial); Reduce elements in collection with a callback and return its final result, setting C<$a> and C<$b> each time the callback is executed. The first time C<$a> will be set to an optional initial value or the first element in the collection. And from then on C<$a> will be set to the return value of the callback, while C<$b> will always be set to the next element in the collection. # Calculate the sum of all values my $sum = $collection->reduce(sub { $a + $b }); # Count how often each value occurs in collection my $hash = $collection->reduce(sub { $a->{$b}++; $a }, {}); =head2 reverse my $new = $collection->reverse; Create a new collection with all elements in reverse order. =head2 shuffle my $new = $collection->shuffle; Create a new collection with all elements in random order. =head2 size my $size = $collection->size; Number of elements in collection. =head2 sort my $new = $collection->sort; my $new = $collection->sort(sub {...}); Sort elements based on return value of a callback and create a new collection from the results, setting C<$a> and C<$b> to the elements being compared, each time the callback is executed. # Sort values case-insensitive my $case_insensitive = $collection->sort(sub { uc($a) cmp uc($b) }); =head2 tail my $new = $collection->tail(4); my $new = $collection->tail(-2); Create a new collection with up to the specified number of elements from the end of the collection. A negative number will count from the beginning. # "C D E" c('A', 'B', 'C', 'D', 'E')->tail(3)->join(' '); # "D E" c('A', 'B', 'C', 'D', 'E')->tail(-3)->join(' '); =head2 tap $collection = $collection->tap(sub {...}); Alias for L. =head2 to_array my $array = $collection->to_array; Turn collection into array reference. =head2 uniq my $new = $collection->uniq; my $new = $collection->uniq(sub {...}); my $new = $collection->uniq('some_method'); my $new = $collection->uniq('some_method', @args); Create a new collection without duplicate elements, using the string representation of either the elements or the return value of the callback/method to decide uniqueness. Note that C and empty string are treated the same. # Longer version my $new = $collection->uniq(sub { $_->some_method(@args) }); # "foo bar baz" c('foo', 'bar', 'bar', 'baz')->uniq->join(' '); # "[[1, 2], [2, 1]]" c([1, 2], [2, 1], [3, 2])->uniq(sub{ $_->[1] })->to_array; =head2 with_roles my $new_class = Mojo::Collection->with_roles('Mojo::Collection::Role::One'); my $new_class = Mojo::Collection->with_roles('+One', '+Two'); $collection = $collection->with_roles('+One', '+Two'); Alias for L. =head1 SEE ALSO L, L, L. =cut diff --git a/modules/Mojo/Content.pm b/modules/Mojo/Content.pm index 5cf7854..389ab86 100644 --- a/modules/Mojo/Content.pm +++ b/modules/Mojo/Content.pm @@ -1,595 +1,595 @@ package Mojo::Content; use Mojo::Base 'Mojo::EventEmitter'; -use Carp 'croak'; +use Carp qw(croak); use Compress::Raw::Zlib qw(WANT_GZIP Z_STREAM_END); use Mojo::Headers; -use Scalar::Util 'looks_like_number'; +use Scalar::Util qw(looks_like_number); has [qw(auto_decompress auto_relax relaxed skip_body)]; has headers => sub { Mojo::Headers->new }; has max_buffer_size => sub { $ENV{MOJO_MAX_BUFFER_SIZE} || 262144 }; has max_leftover_size => sub { $ENV{MOJO_MAX_LEFTOVER_SIZE} || 262144 }; my $BOUNDARY_RE = qr!multipart.*boundary\s*=\s*(?:"([^"]+)"|([\w'(),.:?\-+/]+))!i; sub body_contains { croak 'Method "body_contains" not implemented by subclass'; } sub body_size { croak 'Method "body_size" not implemented by subclass' } sub boundary { (shift->headers->content_type // '') =~ $BOUNDARY_RE ? $1 // $2 : undef; } sub charset { my $type = shift->headers->content_type // ''; return $type =~ /charset\s*=\s*"?([^"\s;]+)"?/i ? $1 : undef; } sub clone { my $self = shift; return undef if $self->is_dynamic; return $self->new(headers => $self->headers->clone); } sub generate_body_chunk { my ($self, $offset) = @_; $self->emit(drain => $offset) unless length($self->{body_buffer} //= ''); return delete $self->{body_buffer} if length $self->{body_buffer}; - return '' if $self->{eof}; + return '' if $self->{eof}; my $len = $self->headers->content_length; return looks_like_number $len && $len == $offset ? '' : undef; } sub get_body_chunk { croak 'Method "get_body_chunk" not implemented by subclass'; } sub get_header_chunk { substr shift->_headers->{header_buffer}, shift, 131072 } sub header_size { length shift->_headers->{header_buffer} } sub headers_contain { index(shift->_headers->{header_buffer}, shift) >= 0 } sub is_chunked { !!shift->headers->transfer_encoding } sub is_compressed { lc(shift->headers->content_encoding // '') eq 'gzip' } sub is_dynamic { !!$_[0]{dynamic} } sub is_finished { (shift->{state} // '') eq 'finished' } sub is_limit_exceeded { !!shift->{limit} } sub is_multipart {undef} sub is_parsing_body { (shift->{state} // '') eq 'body' } sub leftovers { shift->{buffer} } sub parse { my $self = shift; # Headers $self->_parse_until_body(@_); return $self if $self->{state} eq 'headers'; # Chunked content $self->{real_size} //= 0; if ($self->is_chunked && $self->{state} ne 'headers') { $self->_parse_chunked; $self->{state} = 'finished' if ($self->{chunk_state} // '') eq 'finished'; } # Not chunked, pass through to second buffer else { $self->{real_size} += length $self->{pre_buffer}; my $limit = $self->is_finished && length($self->{buffer}) > $self->max_leftover_size; $self->{buffer} .= $self->{pre_buffer} unless $limit; $self->{pre_buffer} = ''; } # No content if ($self->skip_body) { $self->{state} = 'finished'; return $self; } # Relaxed parsing my $headers = $self->headers; my $len = $headers->content_length // ''; if ($self->auto_relax && !length $len) { my $connection = lc($headers->connection // ''); $self->relaxed(1) if $connection eq 'close' || !$connection; } # Chunked or relaxed content if ($self->is_chunked || $self->relaxed) { $self->_decompress($self->{buffer} //= ''); $self->{size} += length $self->{buffer}; $self->{buffer} = ''; return $self; } # Normal content $len = 0 unless looks_like_number $len; if ((my $need = $len - ($self->{size} ||= 0)) > 0) { my $len = length $self->{buffer}; my $chunk = substr $self->{buffer}, 0, $need > $len ? $len : $need, ''; $self->_decompress($chunk); $self->{size} += length $chunk; } $self->{state} = 'finished' if $len <= $self->progress; return $self; } sub parse_body { my $self = shift; $self->{state} = 'body'; return $self->parse(@_); } sub progress { my $self = shift; return 0 unless my $state = $self->{state}; return 0 unless $state eq 'body' || $state eq 'finished'; return $self->{raw_size} - ($self->{header_size} || 0); } sub write { my ($self, $chunk, $cb) = @_; $self->{dynamic} = 1; $self->{body_buffer} .= $chunk if defined $chunk; $self->once(drain => $cb) if $cb; $self->{eof} = 1 if defined $chunk && !length $chunk; return $self; } sub write_chunk { my ($self, $chunk, $cb) = @_; $self->headers->transfer_encoding('chunked') unless $self->{chunked}; @{$self}{qw(chunked dynamic)} = (1, 1); $self->{body_buffer} .= $self->_build_chunk($chunk) if defined $chunk; $self->once(drain => $cb) if $cb; $self->{eof} = 1 if defined $chunk && !length $chunk; return $self; } sub _build_chunk { my ($self, $chunk) = @_; # End return "\x0d\x0a0\x0d\x0a\x0d\x0a" unless length $chunk; # First chunk has no leading CRLF my $crlf = $self->{chunks}++ ? "\x0d\x0a" : ''; return $crlf . sprintf('%x', length $chunk) . "\x0d\x0a$chunk"; } sub _decompress { my ($self, $chunk) = @_; # No compression return $self->emit(read => $chunk) unless $self->auto_decompress && $self->is_compressed; # Decompress $self->{post_buffer} .= $chunk; my $gz = $self->{gz} //= Compress::Raw::Zlib::Inflate->new(WindowBits => WANT_GZIP); my $status = $gz->inflate(\$self->{post_buffer}, my $out); $self->emit(read => $out) if defined $out; # Replace Content-Encoding with Content-Length $self->headers->content_length($gz->total_out)->remove('Content-Encoding') if $status == Z_STREAM_END; # Check buffer size @$self{qw(state limit)} = ('finished', 1) if length($self->{post_buffer} // '') > $self->max_buffer_size; } sub _headers { my $self = shift; return $self if defined $self->{header_buffer}; my $headers = $self->headers->to_string; $self->{header_buffer} = $headers ? "$headers\x0d\x0a\x0d\x0a" : "\x0d\x0a"; return $self; } sub _parse_chunked { my $self = shift; # Trailing headers return $self->_parse_chunked_trailing_headers if ($self->{chunk_state} // '') eq 'trailing_headers'; while (my $len = length $self->{pre_buffer}) { # Start new chunk (ignore the chunk extension) unless ($self->{chunk_len}) { last unless $self->{pre_buffer} =~ s/^(?:\x0d?\x0a)?([0-9a-fA-F]+).*\x0a//; next if $self->{chunk_len} = hex $1; # Last chunk $self->{chunk_state} = 'trailing_headers'; last; } # Remove as much as possible from payload $len = $self->{chunk_len} if $self->{chunk_len} < $len; $self->{buffer} .= substr $self->{pre_buffer}, 0, $len, ''; $self->{real_size} += $len; $self->{chunk_len} -= $len; } # Trailing headers $self->_parse_chunked_trailing_headers if ($self->{chunk_state} // '') eq 'trailing_headers'; # Check buffer size @$self{qw(state limit)} = ('finished', 1) if length($self->{pre_buffer} // '') > $self->max_buffer_size; } sub _parse_chunked_trailing_headers { my $self = shift; my $headers = $self->headers->parse(delete $self->{pre_buffer}); return unless $headers->is_finished; $self->{chunk_state} = 'finished'; # Take care of leftover and replace Transfer-Encoding with Content-Length $self->{buffer} .= $headers->leftovers; $headers->remove('Transfer-Encoding'); $headers->content_length($self->{real_size}) unless $headers->content_length; } sub _parse_headers { my $self = shift; my $headers = $self->headers->parse(delete $self->{pre_buffer}); return unless $headers->is_finished; $self->{state} = 'body'; # Take care of leftovers my $leftovers = $self->{pre_buffer} = $headers->leftovers; $self->{header_size} = $self->{raw_size} - length $leftovers; } sub _parse_until_body { my ($self, $chunk) = @_; $self->{raw_size} += length($chunk //= ''); $self->{pre_buffer} .= $chunk; $self->_parse_headers if ($self->{state} ||= 'headers') eq 'headers'; $self->emit('body') if $self->{state} ne 'headers' && !$self->{body}++; } 1; =encoding utf8 =head1 NAME Mojo::Content - HTTP content base class =head1 SYNOPSIS package Mojo::Content::MyContent; use Mojo::Base 'Mojo::Content'; sub body_contains {...} sub body_size {...} sub get_body_chunk {...} =head1 DESCRIPTION L is an abstract base class for HTTP content containers, based on L and L, like L and L. =head1 EVENTS L inherits all events from L and can emit the following new ones. =head2 body $content->on(body => sub { my $content = shift; ... }); Emitted once all headers have been parsed and the body starts. $content->on(body => sub { my $content = shift; $content->auto_upgrade(0) if $content->headers->header('X-No-MultiPart'); }); =head2 drain $content->on(drain => sub { my ($content, $offset) = @_; ... }); Emitted once all data has been written. $content->on(drain => sub { my $content = shift; $content->write_chunk(time); }); =head2 read $content->on(read => sub { my ($content, $bytes) = @_; ... }); Emitted when a new chunk of content arrives. $content->on(read => sub { my ($content, $bytes) = @_; say "Streaming: $bytes"; }); =head1 ATTRIBUTES L implements the following attributes. =head2 auto_decompress my $bool = $content->auto_decompress; $content = $content->auto_decompress($bool); Decompress content automatically if L is true. =head2 auto_relax my $bool = $content->auto_relax; $content = $content->auto_relax($bool); Try to detect when relaxed parsing is necessary. =head2 headers my $headers = $content->headers; $content = $content->headers(Mojo::Headers->new); Content headers, defaults to a L object. =head2 max_buffer_size my $size = $content->max_buffer_size; $content = $content->max_buffer_size(1024); Maximum size in bytes of buffer for content parser, defaults to the value of the C environment variable or C<262144> (256KiB). =head2 max_leftover_size my $size = $content->max_leftover_size; $content = $content->max_leftover_size(1024); Maximum size in bytes of buffer for pipelined HTTP requests, defaults to the value of the C environment variable or C<262144> (256KiB). =head2 relaxed my $bool = $content->relaxed; $content = $content->relaxed($bool); Activate relaxed parsing for responses that are terminated with a connection close. =head2 skip_body my $bool = $content->skip_body; $content = $content->skip_body($bool); Skip body parsing and finish after headers. =head1 METHODS L inherits all methods from L and implements the following new ones. =head2 body_contains my $bool = $content->body_contains('foo bar baz'); Check if content contains a specific string. Meant to be overloaded in a subclass. =head2 body_size my $size = $content->body_size; Content size in bytes. Meant to be overloaded in a subclass. =head2 boundary my $boundary = $content->boundary; Extract multipart boundary from C header. =head2 charset my $charset = $content->charset; Extract charset from C header. =head2 clone my $clone = $content->clone; Return a new L object cloned from this content if possible, otherwise return C. =head2 generate_body_chunk my $bytes = $content->generate_body_chunk(0); Generate dynamic content. =head2 get_body_chunk my $bytes = $content->get_body_chunk(0); Get a chunk of content starting from a specific position. Meant to be overloaded in a subclass. =head2 get_header_chunk my $bytes = $content->get_header_chunk(13); Get a chunk of the headers starting from a specific position. Note that this method finalizes the content. =head2 header_size my $size = $content->header_size; Size of headers in bytes. Note that this method finalizes the content. =head2 headers_contain my $bool = $content->headers_contain('foo bar baz'); Check if headers contain a specific string. Note that this method finalizes the content. =head2 is_chunked my $bool = $content->is_chunked; Check if C header indicates chunked transfer encoding. =head2 is_compressed my $bool = $content->is_compressed; Check C header for C value. =head2 is_dynamic my $bool = $content->is_dynamic; Check if content will be dynamically generated, which prevents L from working. =head2 is_finished my $bool = $content->is_finished; Check if parser is finished. =head2 is_limit_exceeded my $bool = $content->is_limit_exceeded; Check if buffer has exceeded L. =head2 is_multipart my $bool = $content->is_multipart; False, this is not a L object. =head2 is_parsing_body my $bool = $content->is_parsing_body; Check if body parsing started yet. =head2 leftovers my $bytes = $content->leftovers; Get leftover data from content parser. =head2 parse $content = $content->parse("Content-Length: 12\x0d\x0a\x0d\x0aHello World!"); Parse content chunk. =head2 parse_body $content = $content->parse_body('Hi!'); Parse body chunk and skip headers. =head2 progress my $size = $content->progress; Size of content already received from message in bytes. =head2 write $content = $content->write; $content = $content->write(''); $content = $content->write($bytes); $content = $content->write($bytes => sub {...}); Write dynamic content non-blocking, the optional drain callback will be executed once all data has been written. Calling this method without a chunk of data will finalize the L and allow for dynamic content to be written later. You can write an empty chunk of data at any time to end the stream. # Make sure previous chunk of data has been written before continuing $content->write('He' => sub { my $content = shift; $content->write('llo!' => sub { my $content = shift; $content->write(''); }); }); =head2 write_chunk $content = $content->write_chunk; $content = $content->write_chunk(''); $content = $content->write_chunk($bytes); $content = $content->write_chunk($bytes => sub {...}); Write dynamic content non-blocking with chunked transfer encoding, the optional drain callback will be executed once all data has been written. Calling this method without a chunk of data will finalize the L and allow for dynamic content to be written later. You can write an empty chunk of data at any time to end the stream. # Make sure previous chunk of data has been written before continuing $content->write_chunk('He' => sub { my $content = shift; $content->write_chunk('llo!' => sub { my $content = shift; $content->write_chunk(''); }); }); =head1 SEE ALSO L, L, L. =cut diff --git a/modules/Mojo/Content/MultiPart.pm b/modules/Mojo/Content/MultiPart.pm index 419bfc2..e2dff10 100644 --- a/modules/Mojo/Content/MultiPart.pm +++ b/modules/Mojo/Content/MultiPart.pm @@ -1,314 +1,314 @@ package Mojo::Content::MultiPart; use Mojo::Base 'Mojo::Content'; -use Mojo::Util 'b64_encode'; +use Mojo::Util qw(b64_encode); has parts => sub { [] }; sub body_contains { my ($self, $chunk) = @_; ($_->headers_contain($chunk) or $_->body_contains($chunk)) and return 1 for @{$self->parts}; return undef; } sub body_size { my $self = shift; # Check for existing Content-Length header if (my $len = $self->headers->content_length) { return $len } # Calculate length of whole body my $len = my $boundary_len = length($self->build_boundary) + 6; $len += $_->header_size + $_->body_size + $boundary_len for @{$self->parts}; return $len; } sub build_boundary { my $self = shift; # Check for existing boundary my $boundary; return $boundary if defined($boundary = $self->boundary); # Generate and check boundary my $size = 1; do { $boundary = b64_encode join('', map chr(rand 256), 1 .. $size++ * 3); $boundary =~ s/\W/X/g; } while $self->body_contains($boundary); # Add boundary to Content-Type header my $headers = $self->headers; my ($before, $after) = ('multipart/mixed', ''); ($before, $after) = ($1, $2) if ($headers->content_type // '') =~ m!^(.*multipart/[^;]+)(.*)$!; $headers->content_type("$before; boundary=$boundary$after"); return $boundary; } sub clone { my $self = shift; return undef unless my $clone = $self->SUPER::clone(); return $clone->parts($self->parts); } sub get_body_chunk { my ($self, $offset) = @_; # Body generator return $self->generate_body_chunk($offset) if $self->is_dynamic; # First boundary my $boundary = $self->{boundary} //= $self->build_boundary; my $boundary_len = length($boundary) + 6; my $len = $boundary_len - 2; return substr "--$boundary\x0d\x0a", $offset if $len > $offset; # Skip parts that have already been processed my $start = 0; ($len, $start) = ($self->{last_len}, $self->{last_part} + 1) if $self->{offset} && $offset > $self->{offset}; # Prepare content part by part my $parts = $self->parts; for (my $i = $start; $i < @$parts; $i++) { my $part = $parts->[$i]; # Headers my $header_len = $part->header_size; return $part->get_header_chunk($offset - $len) if ($len + $header_len) > $offset; $len += $header_len; # Content my $content_len = $part->body_size; return $part->get_body_chunk($offset - $len) if ($len + $content_len) > $offset; $len += $content_len; # Boundary if ($#$parts == $i) { $boundary .= '--'; $boundary_len += 2; } return substr "\x0d\x0a--$boundary\x0d\x0a", $offset - $len if ($len + $boundary_len) > $offset; $len += $boundary_len; @{$self}{qw(last_len last_part offset)} = ($len, $i, $offset); } } sub is_multipart {1} sub new { my $self = shift->SUPER::new(@_); $self->on(read => \&_read); return $self; } sub _parse_multipart_body { my ($self, $boundary) = @_; # Whole part in buffer my $pos = index $self->{multipart}, "\x0d\x0a--$boundary"; if ($pos < 0) { my $len = length($self->{multipart}) - (length($boundary) + 8); return undef unless $len > 0; # Store chunk my $chunk = substr $self->{multipart}, 0, $len, ''; $self->parts->[-1] = $self->parts->[-1]->parse($chunk); return undef; } # Store chunk my $chunk = substr $self->{multipart}, 0, $pos, ''; $self->parts->[-1] = $self->parts->[-1]->parse($chunk); return !!($self->{multi_state} = 'multipart_boundary'); } sub _parse_multipart_boundary { my ($self, $boundary) = @_; # Boundary begins if ((index $self->{multipart}, "\x0d\x0a--$boundary\x0d\x0a") == 0) { substr $self->{multipart}, 0, length($boundary) + 6, ''; # New part my $part = Mojo::Content::Single->new(relaxed => 1); $self->emit(part => $part); push @{$self->parts}, $part; return !!($self->{multi_state} = 'multipart_body'); } # Boundary ends my $end = "\x0d\x0a--$boundary--"; if ((index $self->{multipart}, $end) == 0) { substr $self->{multipart}, 0, length $end, ''; $self->{multi_state} = 'finished'; } return undef; } sub _parse_multipart_preamble { my ($self, $boundary) = @_; # No boundary yet return undef if (my $pos = index $self->{multipart}, "--$boundary") < 0; # Replace preamble with carriage return and line feed substr $self->{multipart}, 0, $pos, "\x0d\x0a"; # Parse boundary return !!($self->{multi_state} = 'multipart_boundary'); } sub _read { my ($self, $chunk) = @_; $self->{multipart} .= $chunk; my $boundary = $self->boundary; until (($self->{multi_state} //= 'multipart_preamble') eq 'finished') { # Preamble if ($self->{multi_state} eq 'multipart_preamble') { last unless $self->_parse_multipart_preamble($boundary); } # Boundary elsif ($self->{multi_state} eq 'multipart_boundary') { last unless $self->_parse_multipart_boundary($boundary); } # Body elsif ($self->{multi_state} eq 'multipart_body') { last unless $self->_parse_multipart_body($boundary); } } # Check buffer size @$self{qw(state limit)} = ('finished', 1) if length($self->{multipart} // '') > $self->max_buffer_size; } 1; =encoding utf8 =head1 NAME Mojo::Content::MultiPart - HTTP multipart content =head1 SYNOPSIS use Mojo::Content::MultiPart; my $multi = Mojo::Content::MultiPart->new; $multi->parse('Content-Type: multipart/mixed; boundary=---foobar'); my $single = $multi->parts->[4]; =head1 DESCRIPTION L is a container for HTTP multipart content, based on L, L and L. =head1 EVENTS L inherits all events from L and can emit the following new ones. =head2 part $multi->on(part => sub { my ($multi, $single) = @_; ... }); Emitted when a new L part starts. $multi->on(part => sub { my ($multi, $single) = @_; return unless $single->headers->content_disposition =~ /name="([^"]+)"/; say "Field: $1"; }); =head1 ATTRIBUTES L inherits all attributes from L and implements the following new ones. =head2 parts my $parts = $multi->parts; $multi = $multi->parts([Mojo::Content::Single->new]); Content parts embedded in this multipart content, usually L objects. =head1 METHODS L inherits all methods from L and implements the following new ones. =head2 body_contains my $bool = $multi->body_contains('foobarbaz'); Check if content parts contain a specific string. =head2 body_size my $size = $multi->body_size; Content size in bytes. =head2 build_boundary my $boundary = $multi->build_boundary; Generate a suitable boundary for content and add it to C header. =head2 clone my $clone = $multi->clone; Return a new L object cloned from this content if possible, otherwise return C. =head2 get_body_chunk my $bytes = $multi->get_body_chunk(0); Get a chunk of content starting from a specific position. Note that it might not be possible to get the same chunk twice if content was generated dynamically. =head2 is_multipart my $bool = $multi->is_multipart; True, this is a L object. =head2 new my $multi = Mojo::Content::MultiPart->new; my $multi = Mojo::Content::MultiPart->new(parts => [Mojo::Content::Single->new]); my $multi = Mojo::Content::MultiPart->new({parts => [Mojo::Content::Single->new]}); Construct a new L object and subscribe to event L with default content parser. =head1 SEE ALSO L, L, L. =cut diff --git a/modules/Mojo/Cookie.pm b/modules/Mojo/Cookie.pm index c055b98..732f7d9 100644 --- a/modules/Mojo/Cookie.pm +++ b/modules/Mojo/Cookie.pm @@ -1,89 +1,89 @@ package Mojo::Cookie; use Mojo::Base -base; use overload bool => sub {1}, '""' => sub { shift->to_string }, fallback => 1; -use Carp 'croak'; +use Carp qw(croak); has [qw(name value)]; sub parse { croak 'Method "parse" not implemented by subclass' } sub to_string { croak 'Method "to_string" not implemented by subclass' } 1; =encoding utf8 =head1 NAME Mojo::Cookie - HTTP cookie base class =head1 SYNOPSIS package Mojo::Cookie::MyCookie; use Mojo::Base 'Mojo::Cookie'; sub parse {...} sub to_string {...} =head1 DESCRIPTION L is an abstract base class for HTTP cookie containers, based on L, like L and L. =head1 ATTRIBUTES L implements the following attributes. =head2 name my $name = $cookie->name; $cookie = $cookie->name('foo'); Cookie name. =head2 value my $value = $cookie->value; $cookie = $cookie->value('/test'); Cookie value. =head1 METHODS L inherits all methods from L and implements the following new ones. =head2 parse my $cookies = $cookie->parse($str); Parse cookies. Meant to be overloaded in a subclass. =head2 to_string my $str = $cookie->to_string; Render cookie. Meant to be overloaded in a subclass. =head1 OPERATORS L overloads the following operators. =head2 bool my $bool = !!$cookie; Always true. =head2 stringify my $str = "$cookie"; Alias for L. =head1 SEE ALSO L, L, L. =cut diff --git a/modules/Mojo/DOM.pm b/modules/Mojo/DOM.pm index 3e74728..453fea6 100644 --- a/modules/Mojo/DOM.pm +++ b/modules/Mojo/DOM.pm @@ -1,1137 +1,1137 @@ package Mojo::DOM; use Mojo::Base -strict; use overload '@{}' => sub { shift->child_nodes }, '%{}' => sub { shift->attr }, bool => sub {1}, '""' => sub { shift->to_string }, fallback => 1; # "Fry: This snow is beautiful. I'm glad global warming never happened. # Leela: Actually, it did. But thank God nuclear winter canceled it out." use Mojo::Collection; use Mojo::DOM::CSS; use Mojo::DOM::HTML; use Scalar::Util qw(blessed weaken); -use Storable 'dclone'; +use Storable qw(dclone); sub all_text { _text(_nodes(shift->tree), 1) } sub ancestors { _select($_[0]->_collect([_ancestors($_[0]->tree)]), $_[1]) } -sub append { shift->_add(1, @_) } +sub append { shift->_add(1, @_) } sub append_content { shift->_content(1, 0, @_) } sub at { my $self = shift; return undef unless my $result = $self->_css->select_one(@_); return $self->_build($result, $self->xml); } sub attr { my $self = shift; # Hash my $tree = $self->tree; my $attrs = $tree->[0] ne 'tag' ? {} : $tree->[2]; return $attrs unless @_; # Get return $attrs->{$_[0]} unless @_ > 1 || ref $_[0]; # Set my $values = ref $_[0] ? $_[0] : {@_}; @$attrs{keys %$values} = values %$values; return $self; } sub child_nodes { $_[0]->_collect(_nodes($_[0]->tree)) } sub children { _select($_[0]->_collect(_nodes($_[0]->tree, 1)), $_[1]) } sub content { my $self = shift; my $type = $self->type; if ($type eq 'root' || $type eq 'tag') { return $self->_content(0, 1, @_) if @_; my $html = Mojo::DOM::HTML->new(xml => $self->xml); return join '', map { $html->tree($_)->render } @{_nodes($self->tree)}; } return $self->tree->[1] unless @_; $self->tree->[1] = shift; return $self; } sub descendant_nodes { $_[0]->_collect(_all(_nodes($_[0]->tree))) } sub find { my $self = shift; return $self->_collect($self->_css->select(@_)); } sub following { _select($_[0]->_collect(_siblings($_[0]->tree, 1, 1)), $_[1]) } sub following_nodes { $_[0]->_collect(_siblings($_[0]->tree, 0, 1)) } sub matches { shift->_css->matches(@_) } sub namespace { my $self = shift; return undef if (my $tree = $self->tree)->[0] ne 'tag'; # Extract namespace prefix and search parents my $ns = $tree->[1] =~ /^(.*?):/ ? "xmlns:$1" : undef; for my $node ($tree, _ancestors($tree)) { # Namespace for prefix my $attrs = $node->[2]; if ($ns) { $_ eq $ns and return $attrs->{$_} for keys %$attrs } # Namespace attribute elsif (defined $attrs->{xmlns}) { return $attrs->{xmlns} } } return undef; } sub new { my $class = shift; my $self = bless \Mojo::DOM::HTML->new, ref $class || $class; return @_ ? $self->parse(@_) : $self; } sub new_tag { my $self = shift; my $new = $self->new; $$new->tag(@_); $$new->xml($$self->xml) if ref $self; return $new; } sub next { $_[0]->_maybe(_siblings($_[0]->tree, 1, 1, 0)) } sub next_node { $_[0]->_maybe(_siblings($_[0]->tree, 0, 1, 0)) } sub parent { my $self = shift; return undef if (my $tree = $self->tree)->[0] eq 'root'; return $self->_build(_parent($tree), $self->xml); } sub parse { ${$_[0]}->parse($_[1]) and return $_[0] } sub preceding { _select($_[0]->_collect(_siblings($_[0]->tree, 1, 0)), $_[1]) } sub preceding_nodes { $_[0]->_collect(_siblings($_[0]->tree, 0)) } -sub prepend { shift->_add(0, @_) } +sub prepend { shift->_add(0, @_) } sub prepend_content { shift->_content(0, 0, @_) } sub previous { $_[0]->_maybe(_siblings($_[0]->tree, 1, 0, -1)) } sub previous_node { $_[0]->_maybe(_siblings($_[0]->tree, 0, 0, -1)) } sub remove { shift->replace('') } sub replace { my ($self, $new) = @_; return $self->parse($new) if (my $tree = $self->tree)->[0] eq 'root'; return $self->_replace(_parent($tree), $tree, _nodes($self->_parse($new))); } sub root { my $self = shift; return $self unless my $tree = _ancestors($self->tree, 1); return $self->_build($tree, $self->xml); } sub selector { return undef unless (my $tree = shift->tree)->[0] eq 'tag'; return join ' > ', reverse map { $_->[1] . ':nth-child(' . (@{_siblings($_, 1)} + 1) . ')' } $tree, _ancestors($tree); } sub strip { my $self = shift; return $self if (my $tree = $self->tree)->[0] ne 'tag'; return $self->_replace($tree->[3], $tree, _nodes($tree)); } sub tag { my ($self, $tag) = @_; return undef if (my $tree = $self->tree)->[0] ne 'tag'; return $tree->[1] unless $tag; $tree->[1] = $tag; return $self; } sub tap { shift->Mojo::Base::tap(@_) } sub text { _text(_nodes(shift->tree), 0) } sub to_string { ${shift()}->render } sub tree { @_ > 1 ? (${$_[0]}->tree($_[1]) and return $_[0]) : ${$_[0]}->tree } sub type { shift->tree->[0] } sub val { my $self = shift; # "option" return $self->{value} // $self->text if (my $tag = $self->tag) eq 'option'; # "input" ("type=checkbox" and "type=radio") my $type = $self->{type} // ''; return $self->{value} // 'on' if $tag eq 'input' && ($type eq 'radio' || $type eq 'checkbox'); # "textarea", "input" or "button" return $tag eq 'textarea' ? $self->text : $self->{value} if $tag ne 'select'; # "select" my $v = $self->find('option:checked:not([disabled])') ->grep(sub { !$_->ancestors('optgroup[disabled]')->size })->map('val'); return exists $self->{multiple} ? $v->size ? $v->to_array : undef : $v->last; } sub with_roles { shift->Mojo::Base::with_roles(@_) } sub wrap { shift->_wrap(0, @_) } sub wrap_content { shift->_wrap(1, @_) } sub xml { @_ > 1 ? (${$_[0]}->xml($_[1]) and return $_[0]) : ${$_[0]}->xml } sub _add { my ($self, $offset, $new) = @_; return $self if (my $tree = $self->tree)->[0] eq 'root'; my $parent = _parent($tree); splice @$parent, _offset($parent, $tree) + $offset, 0, @{_link($parent, _nodes($self->_parse($new)))}; return $self; } sub _all { my $nodes = shift; @$nodes = map { $_->[0] eq 'tag' ? ($_, @{_all(_nodes($_))}) : ($_) } @$nodes; return $nodes; } sub _ancestors { my ($tree, $root) = @_; return () unless $tree = _parent($tree); my @ancestors; do { push @ancestors, $tree } while ($tree->[0] eq 'tag') && ($tree = $tree->[3]); return $root ? $ancestors[-1] : @ancestors[0 .. $#ancestors - 1]; } sub _build { shift->new->tree(shift)->xml(shift) } sub _collect { my ($self, $nodes) = (shift, shift // []); my $xml = $self->xml; return Mojo::Collection->new(map { $self->_build($_, $xml) } @$nodes); } sub _content { my ($self, $start, $offset, $new) = @_; my $tree = $self->tree; unless ($tree->[0] eq 'root' || $tree->[0] eq 'tag') { my $old = $self->content; return $self->content($start ? $old . $new : $new . $old); } $start = $start ? ($#$tree + 1) : _start($tree); $offset = $offset ? $#$tree : 0; splice @$tree, $start, $offset, @{_link($tree, _nodes($self->_parse($new)))}; return $self; } sub _css { Mojo::DOM::CSS->new(tree => shift->tree) } sub _fragment { _link(my $r = ['root', @_], [@_]); $r } sub _link { my ($parent, $children) = @_; # Link parent to children for my $node (@$children) { my $offset = $node->[0] eq 'tag' ? 3 : 2; $node->[$offset] = $parent; weaken $node->[$offset]; } return $children; } sub _maybe { $_[1] ? $_[0]->_build($_[1], $_[0]->xml) : undef } sub _nodes { return () unless my $tree = shift; my @nodes = @$tree[_start($tree) .. $#$tree]; return shift() ? [grep { $_->[0] eq 'tag' } @nodes] : \@nodes; } sub _offset { my ($parent, $child) = @_; my $i = _start($parent); $_ eq $child ? last : $i++ for @$parent[$i .. $#$parent]; return $i; } sub _parent { $_[0]->[$_[0][0] eq 'tag' ? 3 : 2] } sub _parse { my ($self, $input) = @_; return Mojo::DOM::HTML->new(xml => $self->xml)->parse($input)->tree unless blessed $input && $input->isa('Mojo::DOM'); my $tree = dclone $input->tree; return $tree->[0] eq 'root' ? $tree : _fragment($tree); } sub _replace { my ($self, $parent, $child, $nodes) = @_; splice @$parent, _offset($parent, $child), 1, @{_link($parent, $nodes)}; return $self->parent; } sub _select { $_[1] ? $_[0]->grep(matches => $_[1]) : $_[0] } sub _siblings { my ($tree, $tags, $tail, $i) = @_; return defined $i ? undef : [] if $tree->[0] eq 'root'; my $nodes = _nodes(_parent($tree)); my $match = -1; defined($match++) and $_ eq $tree and last for @$nodes; if ($tail) { splice @$nodes, 0, $match + 1 } else { splice @$nodes, $match, ($#$nodes + 1) - $match } @$nodes = grep { $_->[0] eq 'tag' } @$nodes if $tags; return defined $i ? $i == -1 && !@$nodes ? undef : $nodes->[$i] : $nodes; } sub _start { $_[0][0] eq 'root' ? 1 : 4 } sub _text { my ($nodes, $all) = @_; my $text = ''; while (my $node = shift @$nodes) { my $type = $node->[0]; # Text if ($type eq 'text' || $type eq 'cdata' || $type eq 'raw') { $text .= $node->[1]; } # Nested tag elsif ($type eq 'tag' && $all) { unshift @$nodes, @{_nodes($node)} } } return $text; } sub _wrap { my ($self, $content, $new) = @_; return $self if (my $tree = $self->tree)->[0] eq 'root' && !$content; return $self if $tree->[0] ne 'root' && $tree->[0] ne 'tag' && $content; # Find innermost tag my $current; my $first = $new = $self->_parse($new); $current = $first while $first = _nodes($first, 1)->[0]; return $self unless $current; # Wrap content if ($content) { push @$current, @{_link($current, _nodes($tree))}; splice @$tree, _start($tree), $#$tree, @{_link($tree, _nodes($new))}; return $self; } # Wrap element $self->_replace(_parent($tree), $tree, _nodes($new)); push @$current, @{_link($current, [$tree])}; return $self; } 1; =encoding utf8 =head1 NAME Mojo::DOM - Minimalistic HTML/XML DOM parser with CSS selectors =head1 SYNOPSIS use Mojo::DOM; # Parse my $dom = Mojo::DOM->new('

Test

123

'); # Find say $dom->at('#b')->text; say $dom->find('p')->map('text')->join("\n"); say $dom->find('[id]')->map(attr => 'id')->join("\n"); # Iterate $dom->find('p[id]')->reverse->each(sub { say $_->{id} }); # Loop for my $e ($dom->find('p[id]')->each) { say $e->{id}, ':', $e->text; } # Modify $dom->find('div p')->last->append('

456

'); $dom->at('#c')->prepend($dom->new_tag('p', id => 'd', '789')); $dom->find(':not(p)')->map('strip'); # Render say "$dom"; =head1 DESCRIPTION L is a minimalistic and relaxed HTML/XML DOM parser with CSS selector support. It will even try to interpret broken HTML and XML, so you should not use it for validation. =head1 NODES AND ELEMENTS When we parse an HTML/XML fragment, it gets turned into a tree of nodes. Hello World! There are currently eight different kinds of nodes, C, C, C, C, C, C, C and C. Elements are nodes of the type C. root |- doctype (html) +- tag (html) |- tag (head) | +- tag (title) | +- raw (Hello) +- tag (body) +- text (World!) While all node types are represented as L objects, some methods like L and L only apply to elements. =head1 CASE-SENSITIVITY L defaults to HTML semantics, that means all tags and attribute names are lowercased and selectors need to be lowercase as well. # HTML semantics my $dom = Mojo::DOM->new('

Hi!

'); say $dom->at('p[id]')->text; If an XML declaration is found, the parser will automatically switch into XML mode and everything becomes case-sensitive. # XML semantics my $dom = Mojo::DOM->new('

Hi!

'); say $dom->at('P[ID]')->text; HTML or XML semantics can also be forced with the L method. # Force HTML semantics my $dom = Mojo::DOM->new->xml(0)->parse('

Hi!

'); say $dom->at('p[id]')->text; # Force XML semantics my $dom = Mojo::DOM->new->xml(1)->parse('

Hi!

'); say $dom->at('P[ID]')->text; =head1 METHODS L implements the following methods. =head2 all_text my $text = $dom->all_text; Extract text content from all descendant nodes of this element. # "foo\nbarbaz\n" $dom->parse("
foo\n

bar

baz\n
")->at('div')->all_text; =head2 ancestors my $collection = $dom->ancestors; my $collection = $dom->ancestors('div ~ p'); Find all ancestor elements of this node matching the CSS selector and return a L object containing these elements as L objects. All selectors from L are supported. # List tag names of ancestor elements say $dom->ancestors->map('tag')->join("\n"); =head2 append $dom = $dom->append('

I ♥ Mojolicious!

'); $dom = $dom->append(Mojo::DOM->new); Append HTML/XML fragment to this node (for all node types other than C). # "

Test

123

" $dom->parse('

Test

') ->at('h1')->append('

123

')->root; # "

Test 123

" $dom->parse('

Test

')->at('p') ->child_nodes->first->append(' 123')->root; =head2 append_content $dom = $dom->append_content('

I ♥ Mojolicious!

'); $dom = $dom->append_content(Mojo::DOM->new); Append HTML/XML fragment (for C and C nodes) or raw content to this node's content. # "

Test123

" $dom->parse('

Test

') ->at('h1')->append_content('123')->root; # "
" $dom->parse('
') ->child_nodes->first->append_content('123 ')->root; # "

Test123

" $dom->parse('

Test

')->at('p')->append_content('123')->root; =head2 at my $result = $dom->at('div ~ p'); my $result = $dom->at('svg|line', svg => 'http://www.w3.org/2000/svg'); Find first descendant element of this element matching the CSS selector and return it as a L object, or C if none could be found. All selectors from L are supported. # Find first element with "svg" namespace definition my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'}; Trailing key/value pairs can be used to declare xml namespace aliases. # "" $dom->parse('') ->at('svg|rect', svg => 'http://www.w3.org/2000/svg'); =head2 attr my $hash = $dom->attr; my $foo = $dom->attr('foo'); $dom = $dom->attr({foo => 'bar'}); $dom = $dom->attr(foo => 'bar'); This element's attributes. # Remove an attribute delete $dom->attr->{id}; # Attribute without value $dom->attr(selected => undef); # List id attributes say $dom->find('*')->map(attr => 'id')->compact->join("\n"); =head2 child_nodes my $collection = $dom->child_nodes; Return a L object containing all child nodes of this element as L objects. # "

123

" $dom->parse('

Test123

')->at('p')->child_nodes->first->remove; # "" $dom->parse('123')->child_nodes->first; # " Test " $dom->parse('123')->child_nodes->last->content; =head2 children my $collection = $dom->children; my $collection = $dom->children('div ~ p'); Find all child elements of this element matching the CSS selector and return a L object containing these elements as L objects. All selectors from L are supported. # Show tag name of random child element say $dom->children->shuffle->first->tag; =head2 content my $str = $dom->content; $dom = $dom->content('

I ♥ Mojolicious!

'); $dom = $dom->content(Mojo::DOM->new); Return this node's content or replace it with HTML/XML fragment (for C and C nodes) or raw content. # "Test" $dom->parse('
Test
')->at('div')->content; # "

123

" $dom->parse('

Test

')->at('h1')->content('123')->root; # "

123

" $dom->parse('

Test

')->at('p')->content('123')->root; # "

" $dom->parse('

Test

')->at('h1')->content('')->root; # " Test " $dom->parse('
')->child_nodes->first->content; # "
456
" $dom->parse('
456
') ->at('div')->child_nodes->first->content(' 123 ')->root; =head2 descendant_nodes my $collection = $dom->descendant_nodes; Return a L object containing all descendant nodes of this element as L objects. # "

123

" $dom->parse('

123

') ->descendant_nodes->grep(sub { $_->type eq 'comment' }) ->map('remove')->first; # "

testtest

" $dom->parse('

123456

') ->at('p')->descendant_nodes->grep(sub { $_->type eq 'text' }) ->map(content => 'test')->first->root; =head2 find my $collection = $dom->find('div ~ p'); my $collection = $dom->find('svg|line', svg => 'http://www.w3.org/2000/svg'); Find all descendant elements of this element matching the CSS selector and return a L object containing these elements as L objects. All selectors from L are supported. # Find a specific element and extract information my $id = $dom->find('div')->[23]{id}; # Extract information from multiple elements my @headers = $dom->find('h1, h2, h3')->map('text')->each; # Count all the different tags my $hash = $dom->find('*')->reduce(sub { $a->{$b->tag}++; $a }, {}); # Find elements with a class that contains dots my @divs = $dom->find('div.foo\.bar')->each; Trailing key/value pairs can be used to declare xml namespace aliases. # "" $dom->parse('') ->find('svg|rect', svg => 'http://www.w3.org/2000/svg')->first; =head2 following my $collection = $dom->following; my $collection = $dom->following('div ~ p'); Find all sibling elements after this node matching the CSS selector and return a L object containing these elements as L objects. All selectors from L are supported. # List tags of sibling elements after this node say $dom->following->map('tag')->join("\n"); =head2 following_nodes my $collection = $dom->following_nodes; Return a L object containing all sibling nodes after this node as L objects. # "C" $dom->parse('

A

C')->at('p')->following_nodes->last->content; =head2 matches my $bool = $dom->matches('div ~ p'); my $bool = $dom->matches('svg|line', svg => 'http://www.w3.org/2000/svg'); Check if this element matches the CSS selector. All selectors from L are supported. # True $dom->parse('

A

')->at('p')->matches('.a'); $dom->parse('

A

')->at('p')->matches('p[class]'); # False $dom->parse('

A

')->at('p')->matches('.b'); $dom->parse('

A

')->at('p')->matches('p[id]'); Trailing key/value pairs can be used to declare xml namespace aliases. # True $dom->parse('') ->matches('svg|rect', svg => 'http://www.w3.org/2000/svg'); =head2 namespace my $namespace = $dom->namespace; Find this element's namespace, or return C if none could be found. # Find namespace for an element with namespace prefix my $namespace = $dom->at('svg > svg\:circle')->namespace; # Find namespace for an element that may or may not have a namespace prefix my $namespace = $dom->at('svg > circle')->namespace; =head2 new my $dom = Mojo::DOM->new; my $dom = Mojo::DOM->new('I ♥ Mojolicious!'); Construct a new scalar-based L object and L HTML/XML fragment if necessary. =head2 new_tag my $tag = Mojo::DOM->new_tag('div'); my $tag = $dom->new_tag('div'); my $tag = $dom->new_tag('div', id => 'foo', hidden => undef); my $tag = $dom->new_tag('div', 'safe content'); my $tag = $dom->new_tag('div', id => 'foo', 'safe content'); my $tag = $dom->new_tag('div', data => {mojo => 'rocks'}, 'safe content'); my $tag = $dom->new_tag('div', id => 'foo', sub { 'unsafe content' }); Construct a new L object for an HTML/XML tag with or without attributes and content. The C attribute may contain a hash reference with key/value pairs to generate attributes from. # "
" $dom->new_tag('br'); # "
" $dom->new_tag('div'); # "" $dom->new_tag('div', id => 'foo', hidden => undef); # "
test & 123
" $dom->new_tag('div', 'test & 123'); # "
test & 123
" $dom->new_tag('div', id => 'foo', 'test & 123'); # "
test & 123
"" $dom->new_tag('div', data => {foo => 1, Bar => 'test'}, 'test & 123'); # "
test & 123
" $dom->new_tag('div', id => 'foo', sub { 'test & 123' }); # "
HelloMojo!
" $dom->parse('
Hello
')->at('div') ->append_content($dom->new_tag('b', 'Mojo!'))->root; =head2 next my $sibling = $dom->next; Return L object for next sibling element, or C if there are no more siblings. # "

123

" $dom->parse('

Test

123

')->at('h1')->next; =head2 next_node my $sibling = $dom->next_node; Return L object for next sibling node, or C if there are no more siblings. # "456" $dom->parse('

123456

') ->at('b')->next_node->next_node; # " Test " $dom->parse('

123456

') ->at('b')->next_node->content; =head2 parent my $parent = $dom->parent; Return L object for parent of this node, or C if this node has no parent. # "Test" $dom->parse('

Test

')->at('i')->parent; =head2 parse $dom = $dom->parse('I ♥ Mojolicious!'); Parse HTML/XML fragment with L. # Parse XML my $dom = Mojo::DOM->new->xml(1)->parse('I ♥ Mojolicious!'); =head2 preceding my $collection = $dom->preceding; my $collection = $dom->preceding('div ~ p'); Find all sibling elements before this node matching the CSS selector and return a L object containing these elements as L objects. All selectors from L are supported. # List tags of sibling elements before this node say $dom->preceding->map('tag')->join("\n"); =head2 preceding_nodes my $collection = $dom->preceding_nodes; Return a L object containing all sibling nodes before this node as L objects. # "A" $dom->parse('A

C

')->at('p')->preceding_nodes->first->content; =head2 prepend $dom = $dom->prepend('

I ♥ Mojolicious!

'); $dom = $dom->prepend(Mojo::DOM->new); Prepend HTML/XML fragment to this node (for all node types other than C). # "

Test

123

" $dom->parse('

123

') ->at('h2')->prepend('

Test

')->root; # "

Test 123

" $dom->parse('

123

') ->at('p')->child_nodes->first->prepend('Test ')->root; =head2 prepend_content $dom = $dom->prepend_content('

I ♥ Mojolicious!

'); $dom = $dom->prepend_content(Mojo::DOM->new); Prepend HTML/XML fragment (for C and C nodes) or raw content to this node's content. # "

Test123

" $dom->parse('

123

') ->at('h2')->prepend_content('Test')->root; # "
" $dom->parse('
') ->child_nodes->first->prepend_content(' Test')->root; # "

123Test

" $dom->parse('

Test

')->at('p')->prepend_content('123')->root; =head2 previous my $sibling = $dom->previous; Return L object for previous sibling element, or C if there are no more siblings. # "

Test

" $dom->parse('

Test

123

')->at('h2')->previous; =head2 previous_node my $sibling = $dom->previous_node; Return L object for previous sibling node, or C if there are no more siblings. # "123" $dom->parse('

123456

') ->at('b')->previous_node->previous_node; # " Test " $dom->parse('

123456

') ->at('b')->previous_node->content; =head2 remove my $parent = $dom->remove; Remove this node and return L (for C nodes) or L. # "
" $dom->parse('

Test

')->at('h1')->remove; # "

456

" $dom->parse('

123456

') ->at('p')->child_nodes->first->remove->root; =head2 replace my $parent = $dom->replace('
I ♥ Mojolicious!
'); my $parent = $dom->replace(Mojo::DOM->new); Replace this node with HTML/XML fragment and return L (for C nodes) or L. # "

123

" $dom->parse('

Test

')->at('h1')->replace('

123

'); # "

123

" $dom->parse('

Test

') ->at('p')->child_nodes->[0]->replace('123')->root; =head2 root my $root = $dom->root; Return L object for C node. =head2 selector my $selector = $dom->selector; Get a unique CSS selector for this element. # "ul:nth-child(1) > li:nth-child(2)" $dom->parse('
  • Test
  • 123
')->find('li')->last->selector; # "p:nth-child(1) > b:nth-child(1) > i:nth-child(1)" $dom->parse('

Test

')->at('i')->selector; =head2 strip my $parent = $dom->strip; Remove this element while preserving its content and return L. # "
Test
" $dom->parse('

Test

')->at('h1')->strip; =head2 tag my $tag = $dom->tag; $dom = $dom->tag('div'); This element's tag name. # List tag names of child elements say $dom->children->map('tag')->join("\n"); =head2 tap $dom = $dom->tap(sub {...}); Alias for L. =head2 text my $text = $dom->text; Extract text content from this element only (not including child elements). # "bar" $dom->parse("
foo

bar

baz
")->at('p')->text; # "foo\nbaz\n" $dom->parse("
foo\n

bar

baz\n
")->at('div')->text; =head2 to_string my $str = $dom->to_string; Render this node and its content to HTML/XML. # "Test" $dom->parse('
Test
')->at('div b')->to_string; =head2 tree my $tree = $dom->tree; $dom = $dom->tree(['root']); Document Object Model. Note that this structure should only be used very carefully since it is very dynamic. =head2 type my $type = $dom->type; This node's type, usually C, C, C, C, C, C, C or C. # "cdata" $dom->parse('')->child_nodes->first->type; # "comment" $dom->parse('')->child_nodes->first->type; # "doctype" $dom->parse('')->child_nodes->first->type; # "pi" $dom->parse('')->child_nodes->first->type; # "raw" $dom->parse('Test')->at('title')->child_nodes->first->type; # "root" $dom->parse('

Test

')->type; # "tag" $dom->parse('

Test

')->at('p')->type; # "text" $dom->parse('

Test

')->at('p')->child_nodes->first->type; =head2 val my $value = $dom->val; Extract value from form element (such as C