Wsgi_15Watt.multipart

Parser for multipart/form-data

This module provides a parser for the multipart/form-data format. It can read from a file, a socket or a WSGI environment. The parser can be used to replace cgi.FieldStorage to work around its limitations.

  1# -*- coding: utf-8 -*-
  2"""
  3Parser for multipart/form-data
  4------------------------------
  5
  6This module provides a parser for the multipart/form-data format. It can read
  7from a file, a socket or a WSGI environment. The parser can be used to replace
  8cgi.FieldStorage to work around its limitations.
  9"""
 10
 11
 12__author__ = "Marcel Hellkamp"
 13__version__ = "0.2.4"
 14__license__ = "MIT"
 15__all__ = ["MultipartError", "MultipartParser", "MultipartPart", "parse_form_data"]
 16
 17
 18import re
 19import sys
 20from io import BytesIO
 21from tempfile import TemporaryFile
 22from urllib.parse import parse_qs
 23from wsgiref.headers import Headers
 24from collections.abc import MutableMapping as DictMixin
 25
 26
 27##############################################################################
 28################################ Helper & Misc ###############################
 29##############################################################################
 30# Some of these were copied from bottle: https://bottlepy.org
 31
 32
 33# ---------
 34# MultiDict
 35# ---------
 36
 37
 38class MultiDict(DictMixin):
 39    """ A dict that remembers old values for each key.
 40        HTTP headers may repeat with differing values,
 41        such as Set-Cookie. We need to remember all
 42        values.
 43    """
 44
 45    def __init__(self, *args, **kwargs):
 46        self.dict = dict()
 47        for k, v in dict(*args, **kwargs).items():
 48            self[k] = v
 49
 50    def __len__(self):
 51        return len(self.dict)
 52
 53    def __iter__(self):
 54        return iter(self.dict)
 55
 56    def __contains__(self, key):
 57        return key in self.dict
 58
 59    def __delitem__(self, key):
 60        del self.dict[key]
 61
 62    def keys(self):
 63        return self.dict.keys()
 64
 65    def __getitem__(self, key):
 66        return self.get(key, KeyError, -1)
 67
 68    def __setitem__(self, key, value):
 69        self.append(key, value)
 70
 71    def append(self, key, value):
 72        self.dict.setdefault(key, []).append(value)
 73
 74    def replace(self, key, value):
 75        self.dict[key] = [value]
 76
 77    def getall(self, key):
 78        return self.dict.get(key) or []
 79
 80    def get(self, key, default=None, index=-1):
 81        if key not in self.dict and default != KeyError:
 82            return [default][index]
 83
 84        return self.dict[key][index]
 85
 86    def iterallitems(self):
 87        for key, values in self.dict.items():
 88            for value in values:
 89                yield key, value
 90
 91
 92def to_bytes(data, enc="utf8"):
 93    if isinstance(data, str):
 94        data = data.encode(enc)
 95
 96    return data
 97
 98
 99def copy_file(stream, target, maxread=-1, buffer_size=2 ** 16):
100    """ Read from :stream and write to :target until :maxread or EOF. """
101    size, read = 0, stream.read
102
103    while True:
104        to_read = buffer_size if maxread < 0 else min(buffer_size, maxread - size)
105        part = read(to_read)
106
107        if not part:
108            return size
109
110        target.write(part)
111        size += len(part)
112
113
114# -------------
115# Header Parser
116# -------------
117
118
119_special = re.escape('()<>@,;:"\\/[]?={} \t')
120_re_special = re.compile(r'[%s]' % _special)
121_quoted_string = r'"(?:\\.|[^"])*"'  # Quoted string
122_value = r'(?:[^%s]+|%s)' % (_special, _quoted_string)  # Save or quoted string
123_option = r'(?:;|^)\s*([^%s]+)\s*=\s*(%s)' % (_special, _value)
124_re_option = re.compile(_option)  # key=value part of an Content-Type like header
125
126
127def header_quote(val):
128    if not _re_special.search(val):
129        return val
130
131    return '"' + val.replace("\\", "\\\\").replace('"', '\\"') + '"'
132
133
134def header_unquote(val, filename=False):
135    if val[0] == val[-1] == '"':
136        val = val[1:-1]
137
138        if val[1:3] == ":\\" or val[:2] == "\\\\":
139            val = val.split("\\")[-1]  # fix ie6 bug: full path --> filename
140
141        return val.replace("\\\\", "\\").replace('\\"', '"')
142
143    return val
144
145
146def parse_options_header(header, options=None):
147    if ";" not in header:
148        return header.lower().strip(), {}
149
150    content_type, tail = header.split(";", 1)
151    options = options or {}
152
153    for match in _re_option.finditer(tail):
154        key = match.group(1).lower()
155        value = header_unquote(match.group(2), key == "filename")
156        options[key] = value
157
158    return content_type, options
159
160
161##############################################################################
162################################## Multipart #################################
163##############################################################################
164
165
166class MultipartError(ValueError):
167    pass
168
169
170class MultipartParser(object):
171    def __init__(
172        self,
173        stream,
174        boundary,
175        content_length=-1,
176        disk_limit=2 ** 30,
177        mem_limit=2 ** 20,
178        memfile_limit=2 ** 18,
179        buffer_size=2 ** 16,
180        charset="latin1",
181    ):
182        """ Parse a multipart/form-data byte stream. This object is an iterator
183            over the parts of the message.
184
185            :param stream: A file-like stream. Must implement ``.read(size)``.
186            :param boundary: The multipart boundary as a byte string.
187            :param content_length: The maximum number of bytes to read.
188        """
189        self.stream = stream
190        self.boundary = boundary
191        self.content_length = content_length
192        self.disk_limit = disk_limit
193        self.memfile_limit = memfile_limit
194        self.mem_limit = min(mem_limit, self.disk_limit)
195        self.buffer_size = min(buffer_size, self.mem_limit)
196        self.charset = charset
197
198        if self.buffer_size - 6 < len(boundary):  # "--boundary--\r\n"
199            raise MultipartError("Boundary does not fit into buffer_size.")
200
201        self._done = []
202        self._part_iter = None
203
204    def __iter__(self):
205        """ Iterate over the parts of the multipart message. """
206        if not self._part_iter:
207            self._part_iter = self._iterparse()
208
209        for part in self._done:
210            yield part
211
212        for part in self._part_iter:
213            self._done.append(part)
214            yield part
215
216    def parts(self):
217        """ Returns a list with all parts of the multipart message. """
218        return list(self)
219
220    def get(self, name, default=None):
221        """ Return the first part with that name or a default value (None). """
222        for part in self:
223            if name == part.name:
224                return part
225
226        return default
227
228    def get_all(self, name):
229        """ Return a list of parts with that name. """
230        return [p for p in self if p.name == name]
231
232    def _lineiter(self):
233        """ Iterate over a binary file-like object line by line. Each line is
234            returned as a (line, line_ending) tuple. If the line does not fit
235            into cls.buffer_size, line_ending is empty and the rest of the line
236            is returned with the next iteration.
237        """
238        read = self.stream.read
239        maxread, maxbuf = self.content_length, self.buffer_size
240        buffer = b""  # buffer for the last (partial) line
241
242        while True:
243            data = read(maxbuf if maxread < 0 else min(maxbuf, maxread))
244            maxread -= len(data)
245            lines = (buffer + data).splitlines(True)
246            len_first_line = len(lines[0])
247
248            # be sure that the first line does not become too big
249            if len_first_line > self.buffer_size:
250                # at the same time don't split a '\r\n' accidentally
251                if len_first_line == self.buffer_size + 1 and lines[0].endswith(b"\r\n"):
252                    splitpos = self.buffer_size - 1
253                else:
254                    splitpos = self.buffer_size
255                lines[:1] = [lines[0][:splitpos], lines[0][splitpos:]]
256
257            if data:
258                buffer = lines[-1]
259                lines = lines[:-1]
260
261            for line in lines:
262                if line.endswith(b"\r\n"):
263                    yield line[:-2], b"\r\n"
264                elif line.endswith(b"\n"):
265                    yield line[:-1], b"\n"
266                elif line.endswith(b"\r"):
267                    yield line[:-1], b"\r"
268                else:
269                    yield line, b""
270
271            if not data:
272                break
273
274    def _iterparse(self):
275        lines, line = self._lineiter(), ""
276        separator = b"--" + to_bytes(self.boundary)
277        terminator = b"--" + to_bytes(self.boundary) + b"--"
278
279        # Consume first boundary. Ignore any preamble, as required by RFC
280        # 2046, section 5.1.1.
281        for line, nl in lines:
282            if line in (separator, terminator):
283                break
284        else:
285            raise MultipartError("Stream does not contain boundary")
286
287        # Check for empty data
288        if line == terminator:
289            for _ in lines:
290                raise MultipartError("Data after end of stream")
291            return
292
293        # For each part in stream...
294        mem_used, disk_used = 0, 0  # Track used resources to prevent DoS
295        is_tail = False  # True if the last line was incomplete (cutted)
296
297        opts = {
298            "buffer_size": self.buffer_size,
299            "memfile_limit": self.memfile_limit,
300            "charset": self.charset,
301        }
302
303        part = MultipartPart(**opts)
304
305        for line, nl in lines:
306            if line == terminator and not is_tail:
307                part.file.seek(0)
308                yield part
309                break
310
311            elif line == separator and not is_tail:
312                if part.is_buffered():
313                    mem_used += part.size
314                else:
315                    disk_used += part.size
316                part.file.seek(0)
317
318                yield part
319
320                part = MultipartPart(**opts)
321
322            else:
323                is_tail = not nl  # The next line continues this one
324                try:
325                    part.feed(line, nl)
326
327                    if part.is_buffered():
328                        if part.size + mem_used > self.mem_limit:
329                            raise MultipartError("Memory limit reached.")
330                    elif part.size + disk_used > self.disk_limit:
331                        raise MultipartError("Disk limit reached.")
332                except MultipartError:
333                    part.close()
334                    raise
335        else:
336            # If we run off the end of the loop, the current MultipartPart
337            # will not have been yielded, so it's our responsibility to
338            # close it.
339            part.close()
340
341        if line != terminator:
342            raise MultipartError("Unexpected end of multipart stream.")
343
344
345class MultipartPart(object):
346    def __init__(self, buffer_size=2 ** 16, memfile_limit=2 ** 18, charset="latin1"):
347        self.headerlist = []
348        self.headers = None
349        self.file = False
350        self.size = 0
351        self._buf = b""
352        self.disposition = None
353        self.name = None
354        self.filename = None
355        self.content_type = None
356        self.charset = charset
357        self.memfile_limit = memfile_limit
358        self.buffer_size = buffer_size
359
360    def feed(self, line, nl=""):
361        if self.file:
362            return self.write_body(line, nl)
363
364        return self.write_header(line, nl)
365
366    def write_header(self, line, nl):
367        line = line.decode(self.charset)
368
369        if not nl:
370            raise MultipartError("Unexpected end of line in header.")
371
372        if not line.strip():  # blank line -> end of header segment
373            self.finish_header()
374        elif line[0] in " \t" and self.headerlist:
375            name, value = self.headerlist.pop()
376            self.headerlist.append((name, value + line.strip()))
377        else:
378            if ":" not in line:
379                raise MultipartError("Syntax error in header: No colon.")
380
381            name, value = line.split(":", 1)
382            self.headerlist.append((name.strip(), value.strip()))
383
384    def write_body(self, line, nl):
385        if not line and not nl:
386            return  # This does not even flush the buffer
387
388        self.size += len(line) + len(self._buf)
389        self.file.write(self._buf + line)
390        self._buf = nl
391
392        if self.content_length > 0 and self.size > self.content_length:
393            raise MultipartError("Size of body exceeds Content-Length header.")
394
395        if self.size > self.memfile_limit and isinstance(self.file, BytesIO):
396            # TODO: What about non-file uploads that exceed the memfile_limit?
397            self.file, old = TemporaryFile(mode="w+b"), self.file
398            old.seek(0)
399            copy_file(old, self.file, self.size, self.buffer_size)
400
401    def finish_header(self):
402        self.file = BytesIO()
403        self.headers = Headers(self.headerlist)
404        content_disposition = self.headers.get("Content-Disposition", "")
405        content_type = self.headers.get("Content-Type", "")
406
407        if not content_disposition:
408            raise MultipartError("Content-Disposition header is missing.")
409
410        self.disposition, self.options = parse_options_header(content_disposition)
411        self.name = self.options.get("name")
412        self.filename = self.options.get("filename")
413        self.content_type, options = parse_options_header(content_type)
414        self.charset = options.get("charset") or self.charset
415        self.content_length = int(self.headers.get("Content-Length", "-1"))
416
417    def is_buffered(self):
418        """ Return true if the data is fully buffered in memory."""
419        return isinstance(self.file, BytesIO)
420
421    @property
422    def value(self):
423        """ Data decoded with the specified charset """
424
425        return self.raw.decode(self.charset)
426
427    @property
428    def raw(self):
429        """ Data without decoding """
430        pos = self.file.tell()
431        self.file.seek(0)
432
433        try:
434            val = self.file.read()
435        except IOError:
436            raise
437        finally:
438            self.file.seek(pos)
439
440        return val
441
442    def save_as(self, path):
443        with open(path, "wb") as fp:
444            pos = self.file.tell()
445
446            try:
447                self.file.seek(0)
448                size = copy_file(self.file, fp)
449            finally:
450                self.file.seek(pos)
451
452        return size
453
454    def close(self):
455        if self.file:
456            self.file.close()
457            self.file = False
458
459
460##############################################################################
461#################################### WSGI ####################################
462##############################################################################
463
464
465def parse_form_data(environ, charset="utf8", strict=False, **kwargs):
466    """ Parse form data from an environ dict and return a (forms, files) tuple.
467        Both tuple values are dictionaries with the form-field name as a key
468        (unicode) and lists as values (multiple values per key are possible).
469        The forms-dictionary contains form-field values as unicode strings.
470        The files-dictionary contains :class:`MultipartPart` instances, either
471        because the form-field was a file-upload or the value is too big to fit
472        into memory limits.
473
474        :param environ: An WSGI environment dict.
475        :param charset: The charset to use if unsure. (default: utf8)
476        :param strict: If True, raise :exc:`MultipartError` on any parsing
477                       errors. These are silently ignored by default.
478    """
479
480    forms, files = MultiDict(), MultiDict()
481
482    try:
483        if environ.get("REQUEST_METHOD", "GET").upper() not in ("POST", "PUT"):
484            raise MultipartError("Request method other than POST or PUT.")
485        content_length = int(environ.get("CONTENT_LENGTH", "-1"))
486        content_type = environ.get("CONTENT_TYPE", "")
487
488        if not content_type:
489            raise MultipartError("Missing Content-Type header.")
490
491        content_type, options = parse_options_header(content_type)
492        stream = environ.get("Wsgi_15Watt.input") or BytesIO()
493        kwargs["charset"] = charset = options.get("charset", charset)
494
495        if content_type == "multipart/form-data":
496            boundary = options.get("boundary", "")
497
498            if not boundary:
499                raise MultipartError("No boundary for multipart/form-data.")
500
501            for part in MultipartParser(stream, boundary, content_length, **kwargs):
502                if part.filename or not part.is_buffered():
503                    files[part.name] = part
504                else:  # TODO: Big form-fields are in the files dict. really?
505                    forms[part.name] = part.value
506
507        elif content_type in (
508            "application/x-www-form-urlencoded",
509            "application/x-url-encoded",
510        ):
511            mem_limit = kwargs.get("mem_limit", 2 ** 20)
512            if content_length > mem_limit:
513                raise MultipartError("Request too big. Increase MAXMEM.")
514
515            data = stream.read(mem_limit).decode(charset)
516
517            if stream.read(1):  # These is more that does not fit mem_limit
518                raise MultipartError("Request too big. Increase MAXMEM.")
519
520            data = parse_qs(data, keep_blank_values=True, encoding=charset)
521
522            for key, values in data.items():
523                for value in values:
524                    forms[key] = value
525        else:
526            raise MultipartError("Unsupported content type.")
527
528    except MultipartError:
529        if strict:
530            for part in files.values():
531                part.close()
532            raise
533
534    return forms, files
class MultipartError(builtins.ValueError):
167class MultipartError(ValueError):
168    pass

Inappropriate argument value (of correct type).

class MultipartParser:
171class MultipartParser(object):
172    def __init__(
173        self,
174        stream,
175        boundary,
176        content_length=-1,
177        disk_limit=2 ** 30,
178        mem_limit=2 ** 20,
179        memfile_limit=2 ** 18,
180        buffer_size=2 ** 16,
181        charset="latin1",
182    ):
183        """ Parse a multipart/form-data byte stream. This object is an iterator
184            over the parts of the message.
185
186            :param stream: A file-like stream. Must implement ``.read(size)``.
187            :param boundary: The multipart boundary as a byte string.
188            :param content_length: The maximum number of bytes to read.
189        """
190        self.stream = stream
191        self.boundary = boundary
192        self.content_length = content_length
193        self.disk_limit = disk_limit
194        self.memfile_limit = memfile_limit
195        self.mem_limit = min(mem_limit, self.disk_limit)
196        self.buffer_size = min(buffer_size, self.mem_limit)
197        self.charset = charset
198
199        if self.buffer_size - 6 < len(boundary):  # "--boundary--\r\n"
200            raise MultipartError("Boundary does not fit into buffer_size.")
201
202        self._done = []
203        self._part_iter = None
204
205    def __iter__(self):
206        """ Iterate over the parts of the multipart message. """
207        if not self._part_iter:
208            self._part_iter = self._iterparse()
209
210        for part in self._done:
211            yield part
212
213        for part in self._part_iter:
214            self._done.append(part)
215            yield part
216
217    def parts(self):
218        """ Returns a list with all parts of the multipart message. """
219        return list(self)
220
221    def get(self, name, default=None):
222        """ Return the first part with that name or a default value (None). """
223        for part in self:
224            if name == part.name:
225                return part
226
227        return default
228
229    def get_all(self, name):
230        """ Return a list of parts with that name. """
231        return [p for p in self if p.name == name]
232
233    def _lineiter(self):
234        """ Iterate over a binary file-like object line by line. Each line is
235            returned as a (line, line_ending) tuple. If the line does not fit
236            into cls.buffer_size, line_ending is empty and the rest of the line
237            is returned with the next iteration.
238        """
239        read = self.stream.read
240        maxread, maxbuf = self.content_length, self.buffer_size
241        buffer = b""  # buffer for the last (partial) line
242
243        while True:
244            data = read(maxbuf if maxread < 0 else min(maxbuf, maxread))
245            maxread -= len(data)
246            lines = (buffer + data).splitlines(True)
247            len_first_line = len(lines[0])
248
249            # be sure that the first line does not become too big
250            if len_first_line > self.buffer_size:
251                # at the same time don't split a '\r\n' accidentally
252                if len_first_line == self.buffer_size + 1 and lines[0].endswith(b"\r\n"):
253                    splitpos = self.buffer_size - 1
254                else:
255                    splitpos = self.buffer_size
256                lines[:1] = [lines[0][:splitpos], lines[0][splitpos:]]
257
258            if data:
259                buffer = lines[-1]
260                lines = lines[:-1]
261
262            for line in lines:
263                if line.endswith(b"\r\n"):
264                    yield line[:-2], b"\r\n"
265                elif line.endswith(b"\n"):
266                    yield line[:-1], b"\n"
267                elif line.endswith(b"\r"):
268                    yield line[:-1], b"\r"
269                else:
270                    yield line, b""
271
272            if not data:
273                break
274
275    def _iterparse(self):
276        lines, line = self._lineiter(), ""
277        separator = b"--" + to_bytes(self.boundary)
278        terminator = b"--" + to_bytes(self.boundary) + b"--"
279
280        # Consume first boundary. Ignore any preamble, as required by RFC
281        # 2046, section 5.1.1.
282        for line, nl in lines:
283            if line in (separator, terminator):
284                break
285        else:
286            raise MultipartError("Stream does not contain boundary")
287
288        # Check for empty data
289        if line == terminator:
290            for _ in lines:
291                raise MultipartError("Data after end of stream")
292            return
293
294        # For each part in stream...
295        mem_used, disk_used = 0, 0  # Track used resources to prevent DoS
296        is_tail = False  # True if the last line was incomplete (cutted)
297
298        opts = {
299            "buffer_size": self.buffer_size,
300            "memfile_limit": self.memfile_limit,
301            "charset": self.charset,
302        }
303
304        part = MultipartPart(**opts)
305
306        for line, nl in lines:
307            if line == terminator and not is_tail:
308                part.file.seek(0)
309                yield part
310                break
311
312            elif line == separator and not is_tail:
313                if part.is_buffered():
314                    mem_used += part.size
315                else:
316                    disk_used += part.size
317                part.file.seek(0)
318
319                yield part
320
321                part = MultipartPart(**opts)
322
323            else:
324                is_tail = not nl  # The next line continues this one
325                try:
326                    part.feed(line, nl)
327
328                    if part.is_buffered():
329                        if part.size + mem_used > self.mem_limit:
330                            raise MultipartError("Memory limit reached.")
331                    elif part.size + disk_used > self.disk_limit:
332                        raise MultipartError("Disk limit reached.")
333                except MultipartError:
334                    part.close()
335                    raise
336        else:
337            # If we run off the end of the loop, the current MultipartPart
338            # will not have been yielded, so it's our responsibility to
339            # close it.
340            part.close()
341
342        if line != terminator:
343            raise MultipartError("Unexpected end of multipart stream.")
MultipartParser( stream, boundary, content_length=-1, disk_limit=1073741824, mem_limit=1048576, memfile_limit=262144, buffer_size=65536, charset='latin1')
172    def __init__(
173        self,
174        stream,
175        boundary,
176        content_length=-1,
177        disk_limit=2 ** 30,
178        mem_limit=2 ** 20,
179        memfile_limit=2 ** 18,
180        buffer_size=2 ** 16,
181        charset="latin1",
182    ):
183        """ Parse a multipart/form-data byte stream. This object is an iterator
184            over the parts of the message.
185
186            :param stream: A file-like stream. Must implement ``.read(size)``.
187            :param boundary: The multipart boundary as a byte string.
188            :param content_length: The maximum number of bytes to read.
189        """
190        self.stream = stream
191        self.boundary = boundary
192        self.content_length = content_length
193        self.disk_limit = disk_limit
194        self.memfile_limit = memfile_limit
195        self.mem_limit = min(mem_limit, self.disk_limit)
196        self.buffer_size = min(buffer_size, self.mem_limit)
197        self.charset = charset
198
199        if self.buffer_size - 6 < len(boundary):  # "--boundary--\r\n"
200            raise MultipartError("Boundary does not fit into buffer_size.")
201
202        self._done = []
203        self._part_iter = None

Parse a multipart/form-data byte stream. This object is an iterator over the parts of the message.

Parameters
  • stream: A file-like stream. Must implement .read(size).
  • boundary: The multipart boundary as a byte string.
  • content_length: The maximum number of bytes to read.
stream
boundary
content_length
disk_limit
memfile_limit
mem_limit
buffer_size
charset
def parts(self):
217    def parts(self):
218        """ Returns a list with all parts of the multipart message. """
219        return list(self)

Returns a list with all parts of the multipart message.

def get(self, name, default=None):
221    def get(self, name, default=None):
222        """ Return the first part with that name or a default value (None). """
223        for part in self:
224            if name == part.name:
225                return part
226
227        return default

Return the first part with that name or a default value (None).

def get_all(self, name):
229    def get_all(self, name):
230        """ Return a list of parts with that name. """
231        return [p for p in self if p.name == name]

Return a list of parts with that name.

class MultipartPart:
346class MultipartPart(object):
347    def __init__(self, buffer_size=2 ** 16, memfile_limit=2 ** 18, charset="latin1"):
348        self.headerlist = []
349        self.headers = None
350        self.file = False
351        self.size = 0
352        self._buf = b""
353        self.disposition = None
354        self.name = None
355        self.filename = None
356        self.content_type = None
357        self.charset = charset
358        self.memfile_limit = memfile_limit
359        self.buffer_size = buffer_size
360
361    def feed(self, line, nl=""):
362        if self.file:
363            return self.write_body(line, nl)
364
365        return self.write_header(line, nl)
366
367    def write_header(self, line, nl):
368        line = line.decode(self.charset)
369
370        if not nl:
371            raise MultipartError("Unexpected end of line in header.")
372
373        if not line.strip():  # blank line -> end of header segment
374            self.finish_header()
375        elif line[0] in " \t" and self.headerlist:
376            name, value = self.headerlist.pop()
377            self.headerlist.append((name, value + line.strip()))
378        else:
379            if ":" not in line:
380                raise MultipartError("Syntax error in header: No colon.")
381
382            name, value = line.split(":", 1)
383            self.headerlist.append((name.strip(), value.strip()))
384
385    def write_body(self, line, nl):
386        if not line and not nl:
387            return  # This does not even flush the buffer
388
389        self.size += len(line) + len(self._buf)
390        self.file.write(self._buf + line)
391        self._buf = nl
392
393        if self.content_length > 0 and self.size > self.content_length:
394            raise MultipartError("Size of body exceeds Content-Length header.")
395
396        if self.size > self.memfile_limit and isinstance(self.file, BytesIO):
397            # TODO: What about non-file uploads that exceed the memfile_limit?
398            self.file, old = TemporaryFile(mode="w+b"), self.file
399            old.seek(0)
400            copy_file(old, self.file, self.size, self.buffer_size)
401
402    def finish_header(self):
403        self.file = BytesIO()
404        self.headers = Headers(self.headerlist)
405        content_disposition = self.headers.get("Content-Disposition", "")
406        content_type = self.headers.get("Content-Type", "")
407
408        if not content_disposition:
409            raise MultipartError("Content-Disposition header is missing.")
410
411        self.disposition, self.options = parse_options_header(content_disposition)
412        self.name = self.options.get("name")
413        self.filename = self.options.get("filename")
414        self.content_type, options = parse_options_header(content_type)
415        self.charset = options.get("charset") or self.charset
416        self.content_length = int(self.headers.get("Content-Length", "-1"))
417
418    def is_buffered(self):
419        """ Return true if the data is fully buffered in memory."""
420        return isinstance(self.file, BytesIO)
421
422    @property
423    def value(self):
424        """ Data decoded with the specified charset """
425
426        return self.raw.decode(self.charset)
427
428    @property
429    def raw(self):
430        """ Data without decoding """
431        pos = self.file.tell()
432        self.file.seek(0)
433
434        try:
435            val = self.file.read()
436        except IOError:
437            raise
438        finally:
439            self.file.seek(pos)
440
441        return val
442
443    def save_as(self, path):
444        with open(path, "wb") as fp:
445            pos = self.file.tell()
446
447            try:
448                self.file.seek(0)
449                size = copy_file(self.file, fp)
450            finally:
451                self.file.seek(pos)
452
453        return size
454
455    def close(self):
456        if self.file:
457            self.file.close()
458            self.file = False
MultipartPart(buffer_size=65536, memfile_limit=262144, charset='latin1')
347    def __init__(self, buffer_size=2 ** 16, memfile_limit=2 ** 18, charset="latin1"):
348        self.headerlist = []
349        self.headers = None
350        self.file = False
351        self.size = 0
352        self._buf = b""
353        self.disposition = None
354        self.name = None
355        self.filename = None
356        self.content_type = None
357        self.charset = charset
358        self.memfile_limit = memfile_limit
359        self.buffer_size = buffer_size
headerlist
headers
file
size
disposition
name
filename
content_type
charset
memfile_limit
buffer_size
def feed(self, line, nl=''):
361    def feed(self, line, nl=""):
362        if self.file:
363            return self.write_body(line, nl)
364
365        return self.write_header(line, nl)
def write_header(self, line, nl):
367    def write_header(self, line, nl):
368        line = line.decode(self.charset)
369
370        if not nl:
371            raise MultipartError("Unexpected end of line in header.")
372
373        if not line.strip():  # blank line -> end of header segment
374            self.finish_header()
375        elif line[0] in " \t" and self.headerlist:
376            name, value = self.headerlist.pop()
377            self.headerlist.append((name, value + line.strip()))
378        else:
379            if ":" not in line:
380                raise MultipartError("Syntax error in header: No colon.")
381
382            name, value = line.split(":", 1)
383            self.headerlist.append((name.strip(), value.strip()))
def write_body(self, line, nl):
385    def write_body(self, line, nl):
386        if not line and not nl:
387            return  # This does not even flush the buffer
388
389        self.size += len(line) + len(self._buf)
390        self.file.write(self._buf + line)
391        self._buf = nl
392
393        if self.content_length > 0 and self.size > self.content_length:
394            raise MultipartError("Size of body exceeds Content-Length header.")
395
396        if self.size > self.memfile_limit and isinstance(self.file, BytesIO):
397            # TODO: What about non-file uploads that exceed the memfile_limit?
398            self.file, old = TemporaryFile(mode="w+b"), self.file
399            old.seek(0)
400            copy_file(old, self.file, self.size, self.buffer_size)
def finish_header(self):
402    def finish_header(self):
403        self.file = BytesIO()
404        self.headers = Headers(self.headerlist)
405        content_disposition = self.headers.get("Content-Disposition", "")
406        content_type = self.headers.get("Content-Type", "")
407
408        if not content_disposition:
409            raise MultipartError("Content-Disposition header is missing.")
410
411        self.disposition, self.options = parse_options_header(content_disposition)
412        self.name = self.options.get("name")
413        self.filename = self.options.get("filename")
414        self.content_type, options = parse_options_header(content_type)
415        self.charset = options.get("charset") or self.charset
416        self.content_length = int(self.headers.get("Content-Length", "-1"))
def is_buffered(self):
418    def is_buffered(self):
419        """ Return true if the data is fully buffered in memory."""
420        return isinstance(self.file, BytesIO)

Return true if the data is fully buffered in memory.

value
422    @property
423    def value(self):
424        """ Data decoded with the specified charset """
425
426        return self.raw.decode(self.charset)

Data decoded with the specified charset

raw
428    @property
429    def raw(self):
430        """ Data without decoding """
431        pos = self.file.tell()
432        self.file.seek(0)
433
434        try:
435            val = self.file.read()
436        except IOError:
437            raise
438        finally:
439            self.file.seek(pos)
440
441        return val

Data without decoding

def save_as(self, path):
443    def save_as(self, path):
444        with open(path, "wb") as fp:
445            pos = self.file.tell()
446
447            try:
448                self.file.seek(0)
449                size = copy_file(self.file, fp)
450            finally:
451                self.file.seek(pos)
452
453        return size
def close(self):
455    def close(self):
456        if self.file:
457            self.file.close()
458            self.file = False
def parse_form_data(environ, charset='utf8', strict=False, **kwargs):
466def parse_form_data(environ, charset="utf8", strict=False, **kwargs):
467    """ Parse form data from an environ dict and return a (forms, files) tuple.
468        Both tuple values are dictionaries with the form-field name as a key
469        (unicode) and lists as values (multiple values per key are possible).
470        The forms-dictionary contains form-field values as unicode strings.
471        The files-dictionary contains :class:`MultipartPart` instances, either
472        because the form-field was a file-upload or the value is too big to fit
473        into memory limits.
474
475        :param environ: An WSGI environment dict.
476        :param charset: The charset to use if unsure. (default: utf8)
477        :param strict: If True, raise :exc:`MultipartError` on any parsing
478                       errors. These are silently ignored by default.
479    """
480
481    forms, files = MultiDict(), MultiDict()
482
483    try:
484        if environ.get("REQUEST_METHOD", "GET").upper() not in ("POST", "PUT"):
485            raise MultipartError("Request method other than POST or PUT.")
486        content_length = int(environ.get("CONTENT_LENGTH", "-1"))
487        content_type = environ.get("CONTENT_TYPE", "")
488
489        if not content_type:
490            raise MultipartError("Missing Content-Type header.")
491
492        content_type, options = parse_options_header(content_type)
493        stream = environ.get("Wsgi_15Watt.input") or BytesIO()
494        kwargs["charset"] = charset = options.get("charset", charset)
495
496        if content_type == "multipart/form-data":
497            boundary = options.get("boundary", "")
498
499            if not boundary:
500                raise MultipartError("No boundary for multipart/form-data.")
501
502            for part in MultipartParser(stream, boundary, content_length, **kwargs):
503                if part.filename or not part.is_buffered():
504                    files[part.name] = part
505                else:  # TODO: Big form-fields are in the files dict. really?
506                    forms[part.name] = part.value
507
508        elif content_type in (
509            "application/x-www-form-urlencoded",
510            "application/x-url-encoded",
511        ):
512            mem_limit = kwargs.get("mem_limit", 2 ** 20)
513            if content_length > mem_limit:
514                raise MultipartError("Request too big. Increase MAXMEM.")
515
516            data = stream.read(mem_limit).decode(charset)
517
518            if stream.read(1):  # These is more that does not fit mem_limit
519                raise MultipartError("Request too big. Increase MAXMEM.")
520
521            data = parse_qs(data, keep_blank_values=True, encoding=charset)
522
523            for key, values in data.items():
524                for value in values:
525                    forms[key] = value
526        else:
527            raise MultipartError("Unsupported content type.")
528
529    except MultipartError:
530        if strict:
531            for part in files.values():
532                part.close()
533            raise
534
535    return forms, files

Parse form data from an environ dict and return a (forms, files) tuple. Both tuple values are dictionaries with the form-field name as a key (unicode) and lists as values (multiple values per key are possible). The forms-dictionary contains form-field values as unicode strings. The files-dictionary contains MultipartPart instances, either because the form-field was a file-upload or the value is too big to fit into memory limits.

Parameters
  • environ: An WSGI environment dict.
  • charset: The charset to use if unsure. (default: utf8)
  • strict: If True, raise MultipartError on any parsing errors. These are silently ignored by default.