FU/MultipartFormData.pm


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
—
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
              package FU::MultipartFormData 0.5;
use v5.36;
use Carp 'confess';
use FU::Util 'utf8_decode';
sub _arg($d) { $d =~ s/^\s+//r =~ s/\s+$//r =~ s/^"(.+)"$/$1/r }
sub parse($pkg, $header, $data) {
    confess "Invalid multipart header '$header'"
        if $header !~ m{^multipart/form-data\s*;\s*boundary\s*=(.+)$};
    my $boundary = _arg $1;
    confess "Invalid multipart boundary '$boundary'" if $boundary !~ /^[\x21-\x7e]+$/;
    utf8::encode($boundary);
    my @a;
    while ($data =~ m{--\Q$boundary\E(?:--\r\n|\r\n((?:.+\r\n)+)\r\n)}xg) {
        my $hdrs = $1;
        $a[$#a]{length} = $-[0] - 2 - $a[$#a]{start} if @a;
        if (!$hdrs) {
            confess "Trailing garbage" if pos $data != length $data;
            last;
        }
        my $d = bless {
            data => $data,
            start => pos $data,
        }, $pkg;
        confess "Missing content-disposition header" if $hdrs !~ /content-disposition:\s*form-data;(.+)/i;
        my $v = $1;
        confess "Missing 'name' parameter" if $v !~ /[;\s]name=([^[;\s]+)/;
        $d->{name} = utf8_decode _arg $1;
        $d->{filename} = utf8_decode _arg $1 if $v =~ /[;\s]filename=([^;\s]+)/;
        if ($hdrs =~ /content-type:\s*([^;\s]+)(?:\s*;\s*charset=([^;\s]+))?/i) {
            $d->{mime} = utf8_decode _arg $1;
            $d->{charset} = utf8_decode _arg $2 if $2;
        }
        push @a, $d;
    }
    confess "Missing end-of-data marker" if @a && !defined $a[$#a]{length};
    \@a
}
sub name     { $_[0]{name} }
sub filename { $_[0]{filename} }
sub mime     { $_[0]{mime} }
sub charset  { $_[0]{charset} }
sub length   { $_[0]{length} }
sub substr($o,$off,$len=undef) {
    $off += $o->{length} if $off < 0;
    $off = 0 if $off < 0;
    $off = $o->{length} if $off > $o->{length};
    $len //= $o->{length} - $off;
    $len += $o->{length} - 1 if $len < 0;
    $len = 0 if $len < 0;
    $len = $o->{length} - $off if $len > $o->{length} - $off;
    substr $o->{data}, $o->{start} + $off, $len;
}
sub data     { $_[0]->substr(0) }
sub value    { utf8_decode $_[0]->data }
sub syswrite($o, $fh) {
    my $off = $o->{start};
    my $end = $o->{start} + $o->{length};
    while ($off < $end) {
        my $r = syswrite $fh, $o->{data}, $end-$off, $off;
        return if !defined $r;
        $off += $r;
    }
    $o->{length};
}
sub save($o, $fn) {
    open my $F, '>', $fn or confess "Error opening '$fn': $!";
    defined $o->syswrite($F) or confess "Error writing to '$fn': $!";
}
sub describe($o) {
    my $head = eval { utf8_decode $o->substr(0, 100) };
    if (defined $head && $head =~ /\n/) {
        ($head) = split /\n/, $head, 2;
        $head .= '...';
    } elsif (defined $head && $o->{length} > 100) {
        $head .= '...';
    }
    $o->{name}.': '.join ' ',
        $o->{filename} ? "filename=$o->{filename}" : (),
        $o->{mime} ? "mime=$o->{mime}" : (),
        $o->{charset} ? "charset=$o->{charset}" : (),
        "length=$o->{length}",
        defined $head ? "value=$head" : ();
}
1;
__END__
=head1 NAME
FU::MultipartFormData - Parse multipart/form-data
=head1 SYNOPSIS
  my $fields = FU::MultipartFormData->parse($content_type_header, $request_body);
  for my $f (@$fields) {
      print "%s   %d\n", $f->name, $f->length;
      $f->save('file.png') if $f->name eq 'image';
  }
=head1 DESCRIPTION
This is a tiny module to parse an HTTP request body encoded as
C<multipart/form-data>, which is typically used to handle file uploads.
The entire request body is assumed to be in memory as a Perl string, but this
module makes an attempt to avoid any further copies of data values.
=head1 Parsing
=over
=item FU::MultipartFormData->parse($header, $body)
Returns an array of field objects from the given C<$header>, which must be a
valid value for the C<Content-Type> request header, and the given C<$body>,
which must hold the request body as a byte string. An error is thrown if the
header is not valid or parsing failed.
This module is pretty lousy and does not fully comform to any HTTP standards,
but it does happen to be able to parse POST data from any browser that I've
tried.
=back
=head1 Field Object
Each field is parsed into a field object that supports the following methods:
=over
=item name
Returns the field name as a Perl Unicode string.
=item filename
Returns the filename as a Perl Unicode string, or C<undef> if no filename was
provided.
=item mime
Returns the mime type extracted from the field's C<Content-Type> header, or
C<undef> if none was present.
=item charset
Returns the charset extracted from the field's C<Content-Type> header, or
C<undef> if none was present.
=item length
Returns the byte length of the field value.
=item data
Returns a copy of the field value as a byte string. You'll want to avoid using
this on large fields.
=item value
Returns a copy of the field value as a Unicode string. Uses C<utf8_decode()>
from L<FU::Util>, so also throws an error if the value contains control
characters.
=item substr($off, $len)
Equivalent to calling C<substr()> on the string returned by C<data>, but avoids
a copy of the entire field value.
=item syswrite($fh)
Write the field value to C<$fh> using Perl's C<syswrite()>, returns C<undef> on
error or the number of bytes written on success.
Can be used to write uploaded file data to a file or send it over a socket or
pipe, without making a full in-memory copy of the data.
=item save($fn)
Save the field value to the file C<$fn>, throws an error on failure.
=item describe
Returns a human-readable string to describe this field. Mainly for debugging
purposes, the exact format is subject to change.
=back
=head1 COPYRIGHT
MIT.
=head1 AUTHOR
Yorhel <projects@yorhel.nl>
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)