Wsgi_15Watt.multipart
Parser for multipart/form-data
This module provides a parser for the multipart/form-data format. It can read from a file, a socket or a WSGI environment. The parser can be used to replace cgi.FieldStorage to work around its limitations.
1# -*- coding: utf-8 -*- 2""" 3Parser for multipart/form-data 4------------------------------ 5 6This module provides a parser for the multipart/form-data format. It can read 7from a file, a socket or a WSGI environment. The parser can be used to replace 8cgi.FieldStorage to work around its limitations. 9""" 10 11 12__author__ = "Marcel Hellkamp" 13__version__ = "0.2.4" 14__license__ = "MIT" 15__all__ = ["MultipartError", "MultipartParser", "MultipartPart", "parse_form_data"] 16 17 18import re 19import sys 20from io import BytesIO 21from tempfile import TemporaryFile 22from urllib.parse import parse_qs 23from wsgiref.headers import Headers 24from collections.abc import MutableMapping as DictMixin 25 26 27############################################################################## 28################################ Helper & Misc ############################### 29############################################################################## 30# Some of these were copied from bottle: https://bottlepy.org 31 32 33# --------- 34# MultiDict 35# --------- 36 37 38class MultiDict(DictMixin): 39 """ A dict that remembers old values for each key. 40 HTTP headers may repeat with differing values, 41 such as Set-Cookie. We need to remember all 42 values. 43 """ 44 45 def __init__(self, *args, **kwargs): 46 self.dict = dict() 47 for k, v in dict(*args, **kwargs).items(): 48 self[k] = v 49 50 def __len__(self): 51 return len(self.dict) 52 53 def __iter__(self): 54 return iter(self.dict) 55 56 def __contains__(self, key): 57 return key in self.dict 58 59 def __delitem__(self, key): 60 del self.dict[key] 61 62 def keys(self): 63 return self.dict.keys() 64 65 def __getitem__(self, key): 66 return self.get(key, KeyError, -1) 67 68 def __setitem__(self, key, value): 69 self.append(key, value) 70 71 def append(self, key, value): 72 self.dict.setdefault(key, []).append(value) 73 74 def replace(self, key, value): 75 self.dict[key] = [value] 76 77 def getall(self, key): 78 return self.dict.get(key) or [] 79 80 def get(self, key, default=None, index=-1): 81 if key not in self.dict and default != KeyError: 82 return [default][index] 83 84 return self.dict[key][index] 85 86 def iterallitems(self): 87 for key, values in self.dict.items(): 88 for value in values: 89 yield key, value 90 91 92def to_bytes(data, enc="utf8"): 93 if isinstance(data, str): 94 data = data.encode(enc) 95 96 return data 97 98 99def copy_file(stream, target, maxread=-1, buffer_size=2 ** 16): 100 """ Read from :stream and write to :target until :maxread or EOF. """ 101 size, read = 0, stream.read 102 103 while True: 104 to_read = buffer_size if maxread < 0 else min(buffer_size, maxread - size) 105 part = read(to_read) 106 107 if not part: 108 return size 109 110 target.write(part) 111 size += len(part) 112 113 114# ------------- 115# Header Parser 116# ------------- 117 118 119_special = re.escape('()<>@,;:"\\/[]?={} \t') 120_re_special = re.compile(r'[%s]' % _special) 121_quoted_string = r'"(?:\\.|[^"])*"' # Quoted string 122_value = r'(?:[^%s]+|%s)' % (_special, _quoted_string) # Save or quoted string 123_option = r'(?:;|^)\s*([^%s]+)\s*=\s*(%s)' % (_special, _value) 124_re_option = re.compile(_option) # key=value part of an Content-Type like header 125 126 127def header_quote(val): 128 if not _re_special.search(val): 129 return val 130 131 return '"' + val.replace("\\", "\\\\").replace('"', '\\"') + '"' 132 133 134def header_unquote(val, filename=False): 135 if val[0] == val[-1] == '"': 136 val = val[1:-1] 137 138 if val[1:3] == ":\\" or val[:2] == "\\\\": 139 val = val.split("\\")[-1] # fix ie6 bug: full path --> filename 140 141 return val.replace("\\\\", "\\").replace('\\"', '"') 142 143 return val 144 145 146def parse_options_header(header, options=None): 147 if ";" not in header: 148 return header.lower().strip(), {} 149 150 content_type, tail = header.split(";", 1) 151 options = options or {} 152 153 for match in _re_option.finditer(tail): 154 key = match.group(1).lower() 155 value = header_unquote(match.group(2), key == "filename") 156 options[key] = value 157 158 return content_type, options 159 160 161############################################################################## 162################################## Multipart ################################# 163############################################################################## 164 165 166class MultipartError(ValueError): 167 pass 168 169 170class MultipartParser(object): 171 def __init__( 172 self, 173 stream, 174 boundary, 175 content_length=-1, 176 disk_limit=2 ** 30, 177 mem_limit=2 ** 20, 178 memfile_limit=2 ** 18, 179 buffer_size=2 ** 16, 180 charset="latin1", 181 ): 182 """ Parse a multipart/form-data byte stream. This object is an iterator 183 over the parts of the message. 184 185 :param stream: A file-like stream. Must implement ``.read(size)``. 186 :param boundary: The multipart boundary as a byte string. 187 :param content_length: The maximum number of bytes to read. 188 """ 189 self.stream = stream 190 self.boundary = boundary 191 self.content_length = content_length 192 self.disk_limit = disk_limit 193 self.memfile_limit = memfile_limit 194 self.mem_limit = min(mem_limit, self.disk_limit) 195 self.buffer_size = min(buffer_size, self.mem_limit) 196 self.charset = charset 197 198 if self.buffer_size - 6 < len(boundary): # "--boundary--\r\n" 199 raise MultipartError("Boundary does not fit into buffer_size.") 200 201 self._done = [] 202 self._part_iter = None 203 204 def __iter__(self): 205 """ Iterate over the parts of the multipart message. """ 206 if not self._part_iter: 207 self._part_iter = self._iterparse() 208 209 for part in self._done: 210 yield part 211 212 for part in self._part_iter: 213 self._done.append(part) 214 yield part 215 216 def parts(self): 217 """ Returns a list with all parts of the multipart message. """ 218 return list(self) 219 220 def get(self, name, default=None): 221 """ Return the first part with that name or a default value (None). """ 222 for part in self: 223 if name == part.name: 224 return part 225 226 return default 227 228 def get_all(self, name): 229 """ Return a list of parts with that name. """ 230 return [p for p in self if p.name == name] 231 232 def _lineiter(self): 233 """ Iterate over a binary file-like object line by line. Each line is 234 returned as a (line, line_ending) tuple. If the line does not fit 235 into cls.buffer_size, line_ending is empty and the rest of the line 236 is returned with the next iteration. 237 """ 238 read = self.stream.read 239 maxread, maxbuf = self.content_length, self.buffer_size 240 buffer = b"" # buffer for the last (partial) line 241 242 while True: 243 data = read(maxbuf if maxread < 0 else min(maxbuf, maxread)) 244 maxread -= len(data) 245 lines = (buffer + data).splitlines(True) 246 len_first_line = len(lines[0]) 247 248 # be sure that the first line does not become too big 249 if len_first_line > self.buffer_size: 250 # at the same time don't split a '\r\n' accidentally 251 if len_first_line == self.buffer_size + 1 and lines[0].endswith(b"\r\n"): 252 splitpos = self.buffer_size - 1 253 else: 254 splitpos = self.buffer_size 255 lines[:1] = [lines[0][:splitpos], lines[0][splitpos:]] 256 257 if data: 258 buffer = lines[-1] 259 lines = lines[:-1] 260 261 for line in lines: 262 if line.endswith(b"\r\n"): 263 yield line[:-2], b"\r\n" 264 elif line.endswith(b"\n"): 265 yield line[:-1], b"\n" 266 elif line.endswith(b"\r"): 267 yield line[:-1], b"\r" 268 else: 269 yield line, b"" 270 271 if not data: 272 break 273 274 def _iterparse(self): 275 lines, line = self._lineiter(), "" 276 separator = b"--" + to_bytes(self.boundary) 277 terminator = b"--" + to_bytes(self.boundary) + b"--" 278 279 # Consume first boundary. Ignore any preamble, as required by RFC 280 # 2046, section 5.1.1. 281 for line, nl in lines: 282 if line in (separator, terminator): 283 break 284 else: 285 raise MultipartError("Stream does not contain boundary") 286 287 # Check for empty data 288 if line == terminator: 289 for _ in lines: 290 raise MultipartError("Data after end of stream") 291 return 292 293 # For each part in stream... 294 mem_used, disk_used = 0, 0 # Track used resources to prevent DoS 295 is_tail = False # True if the last line was incomplete (cutted) 296 297 opts = { 298 "buffer_size": self.buffer_size, 299 "memfile_limit": self.memfile_limit, 300 "charset": self.charset, 301 } 302 303 part = MultipartPart(**opts) 304 305 for line, nl in lines: 306 if line == terminator and not is_tail: 307 part.file.seek(0) 308 yield part 309 break 310 311 elif line == separator and not is_tail: 312 if part.is_buffered(): 313 mem_used += part.size 314 else: 315 disk_used += part.size 316 part.file.seek(0) 317 318 yield part 319 320 part = MultipartPart(**opts) 321 322 else: 323 is_tail = not nl # The next line continues this one 324 try: 325 part.feed(line, nl) 326 327 if part.is_buffered(): 328 if part.size + mem_used > self.mem_limit: 329 raise MultipartError("Memory limit reached.") 330 elif part.size + disk_used > self.disk_limit: 331 raise MultipartError("Disk limit reached.") 332 except MultipartError: 333 part.close() 334 raise 335 else: 336 # If we run off the end of the loop, the current MultipartPart 337 # will not have been yielded, so it's our responsibility to 338 # close it. 339 part.close() 340 341 if line != terminator: 342 raise MultipartError("Unexpected end of multipart stream.") 343 344 345class MultipartPart(object): 346 def __init__(self, buffer_size=2 ** 16, memfile_limit=2 ** 18, charset="latin1"): 347 self.headerlist = [] 348 self.headers = None 349 self.file = False 350 self.size = 0 351 self._buf = b"" 352 self.disposition = None 353 self.name = None 354 self.filename = None 355 self.content_type = None 356 self.charset = charset 357 self.memfile_limit = memfile_limit 358 self.buffer_size = buffer_size 359 360 def feed(self, line, nl=""): 361 if self.file: 362 return self.write_body(line, nl) 363 364 return self.write_header(line, nl) 365 366 def write_header(self, line, nl): 367 line = line.decode(self.charset) 368 369 if not nl: 370 raise MultipartError("Unexpected end of line in header.") 371 372 if not line.strip(): # blank line -> end of header segment 373 self.finish_header() 374 elif line[0] in " \t" and self.headerlist: 375 name, value = self.headerlist.pop() 376 self.headerlist.append((name, value + line.strip())) 377 else: 378 if ":" not in line: 379 raise MultipartError("Syntax error in header: No colon.") 380 381 name, value = line.split(":", 1) 382 self.headerlist.append((name.strip(), value.strip())) 383 384 def write_body(self, line, nl): 385 if not line and not nl: 386 return # This does not even flush the buffer 387 388 self.size += len(line) + len(self._buf) 389 self.file.write(self._buf + line) 390 self._buf = nl 391 392 if self.content_length > 0 and self.size > self.content_length: 393 raise MultipartError("Size of body exceeds Content-Length header.") 394 395 if self.size > self.memfile_limit and isinstance(self.file, BytesIO): 396 # TODO: What about non-file uploads that exceed the memfile_limit? 397 self.file, old = TemporaryFile(mode="w+b"), self.file 398 old.seek(0) 399 copy_file(old, self.file, self.size, self.buffer_size) 400 401 def finish_header(self): 402 self.file = BytesIO() 403 self.headers = Headers(self.headerlist) 404 content_disposition = self.headers.get("Content-Disposition", "") 405 content_type = self.headers.get("Content-Type", "") 406 407 if not content_disposition: 408 raise MultipartError("Content-Disposition header is missing.") 409 410 self.disposition, self.options = parse_options_header(content_disposition) 411 self.name = self.options.get("name") 412 self.filename = self.options.get("filename") 413 self.content_type, options = parse_options_header(content_type) 414 self.charset = options.get("charset") or self.charset 415 self.content_length = int(self.headers.get("Content-Length", "-1")) 416 417 def is_buffered(self): 418 """ Return true if the data is fully buffered in memory.""" 419 return isinstance(self.file, BytesIO) 420 421 @property 422 def value(self): 423 """ Data decoded with the specified charset """ 424 425 return self.raw.decode(self.charset) 426 427 @property 428 def raw(self): 429 """ Data without decoding """ 430 pos = self.file.tell() 431 self.file.seek(0) 432 433 try: 434 val = self.file.read() 435 except IOError: 436 raise 437 finally: 438 self.file.seek(pos) 439 440 return val 441 442 def save_as(self, path): 443 with open(path, "wb") as fp: 444 pos = self.file.tell() 445 446 try: 447 self.file.seek(0) 448 size = copy_file(self.file, fp) 449 finally: 450 self.file.seek(pos) 451 452 return size 453 454 def close(self): 455 if self.file: 456 self.file.close() 457 self.file = False 458 459 460############################################################################## 461#################################### WSGI #################################### 462############################################################################## 463 464 465def parse_form_data(environ, charset="utf8", strict=False, **kwargs): 466 """ Parse form data from an environ dict and return a (forms, files) tuple. 467 Both tuple values are dictionaries with the form-field name as a key 468 (unicode) and lists as values (multiple values per key are possible). 469 The forms-dictionary contains form-field values as unicode strings. 470 The files-dictionary contains :class:`MultipartPart` instances, either 471 because the form-field was a file-upload or the value is too big to fit 472 into memory limits. 473 474 :param environ: An WSGI environment dict. 475 :param charset: The charset to use if unsure. (default: utf8) 476 :param strict: If True, raise :exc:`MultipartError` on any parsing 477 errors. These are silently ignored by default. 478 """ 479 480 forms, files = MultiDict(), MultiDict() 481 482 try: 483 if environ.get("REQUEST_METHOD", "GET").upper() not in ("POST", "PUT"): 484 raise MultipartError("Request method other than POST or PUT.") 485 content_length = int(environ.get("CONTENT_LENGTH", "-1")) 486 content_type = environ.get("CONTENT_TYPE", "") 487 488 if not content_type: 489 raise MultipartError("Missing Content-Type header.") 490 491 content_type, options = parse_options_header(content_type) 492 stream = environ.get("Wsgi_15Watt.input") or BytesIO() 493 kwargs["charset"] = charset = options.get("charset", charset) 494 495 if content_type == "multipart/form-data": 496 boundary = options.get("boundary", "") 497 498 if not boundary: 499 raise MultipartError("No boundary for multipart/form-data.") 500 501 for part in MultipartParser(stream, boundary, content_length, **kwargs): 502 if part.filename or not part.is_buffered(): 503 files[part.name] = part 504 else: # TODO: Big form-fields are in the files dict. really? 505 forms[part.name] = part.value 506 507 elif content_type in ( 508 "application/x-www-form-urlencoded", 509 "application/x-url-encoded", 510 ): 511 mem_limit = kwargs.get("mem_limit", 2 ** 20) 512 if content_length > mem_limit: 513 raise MultipartError("Request too big. Increase MAXMEM.") 514 515 data = stream.read(mem_limit).decode(charset) 516 517 if stream.read(1): # These is more that does not fit mem_limit 518 raise MultipartError("Request too big. Increase MAXMEM.") 519 520 data = parse_qs(data, keep_blank_values=True, encoding=charset) 521 522 for key, values in data.items(): 523 for value in values: 524 forms[key] = value 525 else: 526 raise MultipartError("Unsupported content type.") 527 528 except MultipartError: 529 if strict: 530 for part in files.values(): 531 part.close() 532 raise 533 534 return forms, files
Inappropriate argument value (of correct type).
171class MultipartParser(object): 172 def __init__( 173 self, 174 stream, 175 boundary, 176 content_length=-1, 177 disk_limit=2 ** 30, 178 mem_limit=2 ** 20, 179 memfile_limit=2 ** 18, 180 buffer_size=2 ** 16, 181 charset="latin1", 182 ): 183 """ Parse a multipart/form-data byte stream. This object is an iterator 184 over the parts of the message. 185 186 :param stream: A file-like stream. Must implement ``.read(size)``. 187 :param boundary: The multipart boundary as a byte string. 188 :param content_length: The maximum number of bytes to read. 189 """ 190 self.stream = stream 191 self.boundary = boundary 192 self.content_length = content_length 193 self.disk_limit = disk_limit 194 self.memfile_limit = memfile_limit 195 self.mem_limit = min(mem_limit, self.disk_limit) 196 self.buffer_size = min(buffer_size, self.mem_limit) 197 self.charset = charset 198 199 if self.buffer_size - 6 < len(boundary): # "--boundary--\r\n" 200 raise MultipartError("Boundary does not fit into buffer_size.") 201 202 self._done = [] 203 self._part_iter = None 204 205 def __iter__(self): 206 """ Iterate over the parts of the multipart message. """ 207 if not self._part_iter: 208 self._part_iter = self._iterparse() 209 210 for part in self._done: 211 yield part 212 213 for part in self._part_iter: 214 self._done.append(part) 215 yield part 216 217 def parts(self): 218 """ Returns a list with all parts of the multipart message. """ 219 return list(self) 220 221 def get(self, name, default=None): 222 """ Return the first part with that name or a default value (None). """ 223 for part in self: 224 if name == part.name: 225 return part 226 227 return default 228 229 def get_all(self, name): 230 """ Return a list of parts with that name. """ 231 return [p for p in self if p.name == name] 232 233 def _lineiter(self): 234 """ Iterate over a binary file-like object line by line. Each line is 235 returned as a (line, line_ending) tuple. If the line does not fit 236 into cls.buffer_size, line_ending is empty and the rest of the line 237 is returned with the next iteration. 238 """ 239 read = self.stream.read 240 maxread, maxbuf = self.content_length, self.buffer_size 241 buffer = b"" # buffer for the last (partial) line 242 243 while True: 244 data = read(maxbuf if maxread < 0 else min(maxbuf, maxread)) 245 maxread -= len(data) 246 lines = (buffer + data).splitlines(True) 247 len_first_line = len(lines[0]) 248 249 # be sure that the first line does not become too big 250 if len_first_line > self.buffer_size: 251 # at the same time don't split a '\r\n' accidentally 252 if len_first_line == self.buffer_size + 1 and lines[0].endswith(b"\r\n"): 253 splitpos = self.buffer_size - 1 254 else: 255 splitpos = self.buffer_size 256 lines[:1] = [lines[0][:splitpos], lines[0][splitpos:]] 257 258 if data: 259 buffer = lines[-1] 260 lines = lines[:-1] 261 262 for line in lines: 263 if line.endswith(b"\r\n"): 264 yield line[:-2], b"\r\n" 265 elif line.endswith(b"\n"): 266 yield line[:-1], b"\n" 267 elif line.endswith(b"\r"): 268 yield line[:-1], b"\r" 269 else: 270 yield line, b"" 271 272 if not data: 273 break 274 275 def _iterparse(self): 276 lines, line = self._lineiter(), "" 277 separator = b"--" + to_bytes(self.boundary) 278 terminator = b"--" + to_bytes(self.boundary) + b"--" 279 280 # Consume first boundary. Ignore any preamble, as required by RFC 281 # 2046, section 5.1.1. 282 for line, nl in lines: 283 if line in (separator, terminator): 284 break 285 else: 286 raise MultipartError("Stream does not contain boundary") 287 288 # Check for empty data 289 if line == terminator: 290 for _ in lines: 291 raise MultipartError("Data after end of stream") 292 return 293 294 # For each part in stream... 295 mem_used, disk_used = 0, 0 # Track used resources to prevent DoS 296 is_tail = False # True if the last line was incomplete (cutted) 297 298 opts = { 299 "buffer_size": self.buffer_size, 300 "memfile_limit": self.memfile_limit, 301 "charset": self.charset, 302 } 303 304 part = MultipartPart(**opts) 305 306 for line, nl in lines: 307 if line == terminator and not is_tail: 308 part.file.seek(0) 309 yield part 310 break 311 312 elif line == separator and not is_tail: 313 if part.is_buffered(): 314 mem_used += part.size 315 else: 316 disk_used += part.size 317 part.file.seek(0) 318 319 yield part 320 321 part = MultipartPart(**opts) 322 323 else: 324 is_tail = not nl # The next line continues this one 325 try: 326 part.feed(line, nl) 327 328 if part.is_buffered(): 329 if part.size + mem_used > self.mem_limit: 330 raise MultipartError("Memory limit reached.") 331 elif part.size + disk_used > self.disk_limit: 332 raise MultipartError("Disk limit reached.") 333 except MultipartError: 334 part.close() 335 raise 336 else: 337 # If we run off the end of the loop, the current MultipartPart 338 # will not have been yielded, so it's our responsibility to 339 # close it. 340 part.close() 341 342 if line != terminator: 343 raise MultipartError("Unexpected end of multipart stream.")
172 def __init__( 173 self, 174 stream, 175 boundary, 176 content_length=-1, 177 disk_limit=2 ** 30, 178 mem_limit=2 ** 20, 179 memfile_limit=2 ** 18, 180 buffer_size=2 ** 16, 181 charset="latin1", 182 ): 183 """ Parse a multipart/form-data byte stream. This object is an iterator 184 over the parts of the message. 185 186 :param stream: A file-like stream. Must implement ``.read(size)``. 187 :param boundary: The multipart boundary as a byte string. 188 :param content_length: The maximum number of bytes to read. 189 """ 190 self.stream = stream 191 self.boundary = boundary 192 self.content_length = content_length 193 self.disk_limit = disk_limit 194 self.memfile_limit = memfile_limit 195 self.mem_limit = min(mem_limit, self.disk_limit) 196 self.buffer_size = min(buffer_size, self.mem_limit) 197 self.charset = charset 198 199 if self.buffer_size - 6 < len(boundary): # "--boundary--\r\n" 200 raise MultipartError("Boundary does not fit into buffer_size.") 201 202 self._done = [] 203 self._part_iter = None
Parse a multipart/form-data byte stream. This object is an iterator over the parts of the message.
Parameters
- stream: A file-like stream. Must implement
.read(size)
. - boundary: The multipart boundary as a byte string.
- content_length: The maximum number of bytes to read.
217 def parts(self): 218 """ Returns a list with all parts of the multipart message. """ 219 return list(self)
Returns a list with all parts of the multipart message.
221 def get(self, name, default=None): 222 """ Return the first part with that name or a default value (None). """ 223 for part in self: 224 if name == part.name: 225 return part 226 227 return default
Return the first part with that name or a default value (None).
346class MultipartPart(object): 347 def __init__(self, buffer_size=2 ** 16, memfile_limit=2 ** 18, charset="latin1"): 348 self.headerlist = [] 349 self.headers = None 350 self.file = False 351 self.size = 0 352 self._buf = b"" 353 self.disposition = None 354 self.name = None 355 self.filename = None 356 self.content_type = None 357 self.charset = charset 358 self.memfile_limit = memfile_limit 359 self.buffer_size = buffer_size 360 361 def feed(self, line, nl=""): 362 if self.file: 363 return self.write_body(line, nl) 364 365 return self.write_header(line, nl) 366 367 def write_header(self, line, nl): 368 line = line.decode(self.charset) 369 370 if not nl: 371 raise MultipartError("Unexpected end of line in header.") 372 373 if not line.strip(): # blank line -> end of header segment 374 self.finish_header() 375 elif line[0] in " \t" and self.headerlist: 376 name, value = self.headerlist.pop() 377 self.headerlist.append((name, value + line.strip())) 378 else: 379 if ":" not in line: 380 raise MultipartError("Syntax error in header: No colon.") 381 382 name, value = line.split(":", 1) 383 self.headerlist.append((name.strip(), value.strip())) 384 385 def write_body(self, line, nl): 386 if not line and not nl: 387 return # This does not even flush the buffer 388 389 self.size += len(line) + len(self._buf) 390 self.file.write(self._buf + line) 391 self._buf = nl 392 393 if self.content_length > 0 and self.size > self.content_length: 394 raise MultipartError("Size of body exceeds Content-Length header.") 395 396 if self.size > self.memfile_limit and isinstance(self.file, BytesIO): 397 # TODO: What about non-file uploads that exceed the memfile_limit? 398 self.file, old = TemporaryFile(mode="w+b"), self.file 399 old.seek(0) 400 copy_file(old, self.file, self.size, self.buffer_size) 401 402 def finish_header(self): 403 self.file = BytesIO() 404 self.headers = Headers(self.headerlist) 405 content_disposition = self.headers.get("Content-Disposition", "") 406 content_type = self.headers.get("Content-Type", "") 407 408 if not content_disposition: 409 raise MultipartError("Content-Disposition header is missing.") 410 411 self.disposition, self.options = parse_options_header(content_disposition) 412 self.name = self.options.get("name") 413 self.filename = self.options.get("filename") 414 self.content_type, options = parse_options_header(content_type) 415 self.charset = options.get("charset") or self.charset 416 self.content_length = int(self.headers.get("Content-Length", "-1")) 417 418 def is_buffered(self): 419 """ Return true if the data is fully buffered in memory.""" 420 return isinstance(self.file, BytesIO) 421 422 @property 423 def value(self): 424 """ Data decoded with the specified charset """ 425 426 return self.raw.decode(self.charset) 427 428 @property 429 def raw(self): 430 """ Data without decoding """ 431 pos = self.file.tell() 432 self.file.seek(0) 433 434 try: 435 val = self.file.read() 436 except IOError: 437 raise 438 finally: 439 self.file.seek(pos) 440 441 return val 442 443 def save_as(self, path): 444 with open(path, "wb") as fp: 445 pos = self.file.tell() 446 447 try: 448 self.file.seek(0) 449 size = copy_file(self.file, fp) 450 finally: 451 self.file.seek(pos) 452 453 return size 454 455 def close(self): 456 if self.file: 457 self.file.close() 458 self.file = False
347 def __init__(self, buffer_size=2 ** 16, memfile_limit=2 ** 18, charset="latin1"): 348 self.headerlist = [] 349 self.headers = None 350 self.file = False 351 self.size = 0 352 self._buf = b"" 353 self.disposition = None 354 self.name = None 355 self.filename = None 356 self.content_type = None 357 self.charset = charset 358 self.memfile_limit = memfile_limit 359 self.buffer_size = buffer_size
367 def write_header(self, line, nl): 368 line = line.decode(self.charset) 369 370 if not nl: 371 raise MultipartError("Unexpected end of line in header.") 372 373 if not line.strip(): # blank line -> end of header segment 374 self.finish_header() 375 elif line[0] in " \t" and self.headerlist: 376 name, value = self.headerlist.pop() 377 self.headerlist.append((name, value + line.strip())) 378 else: 379 if ":" not in line: 380 raise MultipartError("Syntax error in header: No colon.") 381 382 name, value = line.split(":", 1) 383 self.headerlist.append((name.strip(), value.strip()))
385 def write_body(self, line, nl): 386 if not line and not nl: 387 return # This does not even flush the buffer 388 389 self.size += len(line) + len(self._buf) 390 self.file.write(self._buf + line) 391 self._buf = nl 392 393 if self.content_length > 0 and self.size > self.content_length: 394 raise MultipartError("Size of body exceeds Content-Length header.") 395 396 if self.size > self.memfile_limit and isinstance(self.file, BytesIO): 397 # TODO: What about non-file uploads that exceed the memfile_limit? 398 self.file, old = TemporaryFile(mode="w+b"), self.file 399 old.seek(0) 400 copy_file(old, self.file, self.size, self.buffer_size)
402 def finish_header(self): 403 self.file = BytesIO() 404 self.headers = Headers(self.headerlist) 405 content_disposition = self.headers.get("Content-Disposition", "") 406 content_type = self.headers.get("Content-Type", "") 407 408 if not content_disposition: 409 raise MultipartError("Content-Disposition header is missing.") 410 411 self.disposition, self.options = parse_options_header(content_disposition) 412 self.name = self.options.get("name") 413 self.filename = self.options.get("filename") 414 self.content_type, options = parse_options_header(content_type) 415 self.charset = options.get("charset") or self.charset 416 self.content_length = int(self.headers.get("Content-Length", "-1"))
418 def is_buffered(self): 419 """ Return true if the data is fully buffered in memory.""" 420 return isinstance(self.file, BytesIO)
Return true if the data is fully buffered in memory.
422 @property 423 def value(self): 424 """ Data decoded with the specified charset """ 425 426 return self.raw.decode(self.charset)
Data decoded with the specified charset
428 @property 429 def raw(self): 430 """ Data without decoding """ 431 pos = self.file.tell() 432 self.file.seek(0) 433 434 try: 435 val = self.file.read() 436 except IOError: 437 raise 438 finally: 439 self.file.seek(pos) 440 441 return val
Data without decoding
466def parse_form_data(environ, charset="utf8", strict=False, **kwargs): 467 """ Parse form data from an environ dict and return a (forms, files) tuple. 468 Both tuple values are dictionaries with the form-field name as a key 469 (unicode) and lists as values (multiple values per key are possible). 470 The forms-dictionary contains form-field values as unicode strings. 471 The files-dictionary contains :class:`MultipartPart` instances, either 472 because the form-field was a file-upload or the value is too big to fit 473 into memory limits. 474 475 :param environ: An WSGI environment dict. 476 :param charset: The charset to use if unsure. (default: utf8) 477 :param strict: If True, raise :exc:`MultipartError` on any parsing 478 errors. These are silently ignored by default. 479 """ 480 481 forms, files = MultiDict(), MultiDict() 482 483 try: 484 if environ.get("REQUEST_METHOD", "GET").upper() not in ("POST", "PUT"): 485 raise MultipartError("Request method other than POST or PUT.") 486 content_length = int(environ.get("CONTENT_LENGTH", "-1")) 487 content_type = environ.get("CONTENT_TYPE", "") 488 489 if not content_type: 490 raise MultipartError("Missing Content-Type header.") 491 492 content_type, options = parse_options_header(content_type) 493 stream = environ.get("Wsgi_15Watt.input") or BytesIO() 494 kwargs["charset"] = charset = options.get("charset", charset) 495 496 if content_type == "multipart/form-data": 497 boundary = options.get("boundary", "") 498 499 if not boundary: 500 raise MultipartError("No boundary for multipart/form-data.") 501 502 for part in MultipartParser(stream, boundary, content_length, **kwargs): 503 if part.filename or not part.is_buffered(): 504 files[part.name] = part 505 else: # TODO: Big form-fields are in the files dict. really? 506 forms[part.name] = part.value 507 508 elif content_type in ( 509 "application/x-www-form-urlencoded", 510 "application/x-url-encoded", 511 ): 512 mem_limit = kwargs.get("mem_limit", 2 ** 20) 513 if content_length > mem_limit: 514 raise MultipartError("Request too big. Increase MAXMEM.") 515 516 data = stream.read(mem_limit).decode(charset) 517 518 if stream.read(1): # These is more that does not fit mem_limit 519 raise MultipartError("Request too big. Increase MAXMEM.") 520 521 data = parse_qs(data, keep_blank_values=True, encoding=charset) 522 523 for key, values in data.items(): 524 for value in values: 525 forms[key] = value 526 else: 527 raise MultipartError("Unsupported content type.") 528 529 except MultipartError: 530 if strict: 531 for part in files.values(): 532 part.close() 533 raise 534 535 return forms, files
Parse form data from an environ dict and return a (forms, files) tuple.
Both tuple values are dictionaries with the form-field name as a key
(unicode) and lists as values (multiple values per key are possible).
The forms-dictionary contains form-field values as unicode strings.
The files-dictionary contains MultipartPart
instances, either
because the form-field was a file-upload or the value is too big to fit
into memory limits.
Parameters
- environ: An WSGI environment dict.
- charset: The charset to use if unsure. (default: utf8)
- strict: If True, raise
MultipartError
on any parsing errors. These are silently ignored by default.