| Home | Trees | Indices | Help |
|
|---|
|
|
1
2 import gzip
3 import sys
4
5 import cherrypy
6 from cherrypy._cpcompat import BytesIO, IncompleteRead, ntob, ntou
7
8 europoundUnicode = ntou('\x80\xa3')
9 sing = ntou("\u6bdb\u6cfd\u4e1c: Sing, Little Birdie?", 'escape')
10 sing8 = sing.encode('utf-8')
11 sing16 = sing.encode('utf-16')
12
13
14 from cherrypy.test import helper
15
16
18
20 class Root:
21
22 def index(self, param):
23 assert param == europoundUnicode, "%r != %r" % (
24 param, europoundUnicode)
25 yield europoundUnicode
26 index.exposed = True
27
28 def mao_zedong(self):
29 return sing
30 mao_zedong.exposed = True
31
32 def utf8(self):
33 return sing8
34 utf8.exposed = True
35 utf8._cp_config = {'tools.encode.encoding': 'utf-8'}
36
37 def cookies_and_headers(self):
38 # if the headers have non-ascii characters and a cookie has
39 # any part which is unicode (even ascii), the response
40 # should not fail.
41 cherrypy.response.cookie['candy'] = 'bar'
42 cherrypy.response.cookie['candy']['domain'] = 'cherrypy.org'
43 cherrypy.response.headers[
44 'Some-Header'] = 'My d\xc3\xb6g has fleas'
45 return 'Any content'
46 cookies_and_headers.exposed = True
47
48 def reqparams(self, *args, **kwargs):
49 return ntob(', ').join(
50 [": ".join((k, v)).encode('utf8')
51 for k, v in sorted(cherrypy.request.params.items())]
52 )
53 reqparams.exposed = True
54
55 def nontext(self, *args, **kwargs):
56 cherrypy.response.headers[
57 'Content-Type'] = 'application/binary'
58 return '\x00\x01\x02\x03'
59 nontext.exposed = True
60 nontext._cp_config = {'tools.encode.text_only': False,
61 'tools.encode.add_charset': True,
62 }
63
64 class GZIP:
65
66 def index(self):
67 yield "Hello, world"
68 index.exposed = True
69
70 def noshow(self):
71 # Test for ticket #147, where yield showed no exceptions
72 # (content-encoding was still gzip even though traceback
73 # wasn't zipped).
74 raise IndexError()
75 yield "Here be dragons"
76 noshow.exposed = True
77 # Turn encoding off so the gzip tool is the one doing the collapse.
78 noshow._cp_config = {'tools.encode.on': False}
79
80 def noshow_stream(self):
81 # Test for ticket #147, where yield showed no exceptions
82 # (content-encoding was still gzip even though traceback
83 # wasn't zipped).
84 raise IndexError()
85 yield "Here be dragons"
86 noshow_stream.exposed = True
87 noshow_stream._cp_config = {'response.stream': True}
88
89 class Decode:
90
91 def extra_charset(self, *args, **kwargs):
92 return ', '.join([": ".join((k, v))
93 for k, v in cherrypy.request.params.items()])
94 extra_charset.exposed = True
95 extra_charset._cp_config = {
96 'tools.decode.on': True,
97 'tools.decode.default_encoding': ['utf-16'],
98 }
99
100 def force_charset(self, *args, **kwargs):
101 return ', '.join([": ".join((k, v))
102 for k, v in cherrypy.request.params.items()])
103 force_charset.exposed = True
104 force_charset._cp_config = {
105 'tools.decode.on': True,
106 'tools.decode.encoding': 'utf-16',
107 }
108
109 root = Root()
110 root.gzip = GZIP()
111 root.decode = Decode()
112 cherrypy.tree.mount(root, config={'/gzip': {'tools.gzip.on': True}})
113 setup_server = staticmethod(setup_server)
114
116 europoundUtf8 = europoundUnicode.encode('utf-8')
117 self.getPage(ntob('/?param=') + europoundUtf8)
118 self.assertBody(europoundUtf8)
119
120 # Encoded utf8 query strings MUST be parsed correctly.
121 # Here, q is the POUND SIGN U+00A3 encoded in utf8 and then %HEX
122 self.getPage("/reqparams?q=%C2%A3")
123 # The return value will be encoded as utf8.
124 self.assertBody(ntob("q: \xc2\xa3"))
125
126 # Query strings that are incorrectly encoded MUST raise 404.
127 # Here, q is the POUND SIGN U+00A3 encoded in latin1 and then %HEX
128 self.getPage("/reqparams?q=%A3")
129 self.assertStatus(404)
130 self.assertErrorPage(
131 404,
132 "The given query string could not be processed. Query "
133 "strings for this resource must be encoded with 'utf8'.")
134
136 # Test the decoding of an application/x-www-form-urlencoded entity.
137 europoundUtf8 = europoundUnicode.encode('utf-8')
138 body = ntob("param=") + europoundUtf8
139 self.getPage('/',
140 method='POST',
141 headers=[
142 ("Content-Type", "application/x-www-form-urlencoded"),
143 ("Content-Length", str(len(body))),
144 ],
145 body=body),
146 self.assertBody(europoundUtf8)
147
148 # Encoded utf8 entities MUST be parsed and decoded correctly.
149 # Here, q is the POUND SIGN U+00A3 encoded in utf8
150 body = ntob("q=\xc2\xa3")
151 self.getPage('/reqparams', method='POST',
152 headers=[(
153 "Content-Type", "application/x-www-form-urlencoded"),
154 ("Content-Length", str(len(body))),
155 ],
156 body=body),
157 self.assertBody(ntob("q: \xc2\xa3"))
158
159 # ...and in utf16, which is not in the default attempt_charsets list:
160 body = ntob("\xff\xfeq\x00=\xff\xfe\xa3\x00")
161 self.getPage('/reqparams',
162 method='POST',
163 headers=[
164 ("Content-Type",
165 "application/x-www-form-urlencoded;charset=utf-16"),
166 ("Content-Length", str(len(body))),
167 ],
168 body=body),
169 self.assertBody(ntob("q: \xc2\xa3"))
170
171 # Entities that are incorrectly encoded MUST raise 400.
172 # Here, q is the POUND SIGN U+00A3 encoded in utf16, but
173 # the Content-Type incorrectly labels it utf-8.
174 body = ntob("\xff\xfeq\x00=\xff\xfe\xa3\x00")
175 self.getPage('/reqparams',
176 method='POST',
177 headers=[
178 ("Content-Type",
179 "application/x-www-form-urlencoded;charset=utf-8"),
180 ("Content-Length", str(len(body))),
181 ],
182 body=body),
183 self.assertStatus(400)
184 self.assertErrorPage(
185 400,
186 "The request entity could not be decoded. The following charsets "
187 "were attempted: ['utf-8']")
188
190 # An extra charset should be tried first, and succeed if it matches.
191 # Here, we add utf-16 as a charset and pass a utf-16 body.
192 body = ntob("\xff\xfeq\x00=\xff\xfe\xa3\x00")
193 self.getPage('/decode/extra_charset', method='POST',
194 headers=[(
195 "Content-Type", "application/x-www-form-urlencoded"),
196 ("Content-Length", str(len(body))),
197 ],
198 body=body),
199 self.assertBody(ntob("q: \xc2\xa3"))
200
201 # An extra charset should be tried first, and continue to other default
202 # charsets if it doesn't match.
203 # Here, we add utf-16 as a charset but still pass a utf-8 body.
204 body = ntob("q=\xc2\xa3")
205 self.getPage('/decode/extra_charset', method='POST',
206 headers=[(
207 "Content-Type", "application/x-www-form-urlencoded"),
208 ("Content-Length", str(len(body))),
209 ],
210 body=body),
211 self.assertBody(ntob("q: \xc2\xa3"))
212
213 # An extra charset should error if force is True and it doesn't match.
214 # Here, we force utf-16 as a charset but still pass a utf-8 body.
215 body = ntob("q=\xc2\xa3")
216 self.getPage('/decode/force_charset', method='POST',
217 headers=[(
218 "Content-Type", "application/x-www-form-urlencoded"),
219 ("Content-Length", str(len(body))),
220 ],
221 body=body),
222 self.assertErrorPage(
223 400,
224 "The request entity could not be decoded. The following charsets "
225 "were attempted: ['utf-16']")
226
228 # Test the decoding of a multipart entity when the charset (utf16) is
229 # explicitly given.
230 body = ntob('\r\n'.join([
231 '--X',
232 'Content-Type: text/plain;charset=utf-16',
233 'Content-Disposition: form-data; name="text"',
234 '',
235 '\xff\xfea\x00b\x00\x1c c\x00',
236 '--X',
237 'Content-Type: text/plain;charset=utf-16',
238 'Content-Disposition: form-data; name="submit"',
239 '',
240 '\xff\xfeC\x00r\x00e\x00a\x00t\x00e\x00',
241 '--X--'
242 ]))
243 self.getPage('/reqparams', method='POST',
244 headers=[(
245 "Content-Type", "multipart/form-data;boundary=X"),
246 ("Content-Length", str(len(body))),
247 ],
248 body=body),
249 self.assertBody(ntob("submit: Create, text: ab\xe2\x80\x9cc"))
250
252 # Test the decoding of a multipart entity when the charset (utf8) is
253 # NOT explicitly given, but is in the list of charsets to attempt.
254 body = ntob('\r\n'.join([
255 '--X',
256 'Content-Disposition: form-data; name="text"',
257 '',
258 '\xe2\x80\x9c',
259 '--X',
260 'Content-Disposition: form-data; name="submit"',
261 '',
262 'Create',
263 '--X--'
264 ]))
265 self.getPage('/reqparams', method='POST',
266 headers=[(
267 "Content-Type", "multipart/form-data;boundary=X"),
268 ("Content-Length", str(len(body))),
269 ],
270 body=body),
271 self.assertBody(ntob("submit: Create, text: \xe2\x80\x9c"))
272
274 # Test the decoding of a multipart entity when the charset (utf16) is
275 # NOT explicitly given, and is NOT in the list of charsets to attempt.
276 body = ntob('\r\n'.join([
277 '--X',
278 'Content-Disposition: form-data; name="text"',
279 '',
280 '\xff\xfea\x00b\x00\x1c c\x00',
281 '--X',
282 'Content-Disposition: form-data; name="submit"',
283 '',
284 '\xff\xfeC\x00r\x00e\x00a\x00t\x00e\x00',
285 '--X--'
286 ]))
287 self.getPage('/reqparams', method='POST',
288 headers=[(
289 "Content-Type", "multipart/form-data;boundary=X"),
290 ("Content-Length", str(len(body))),
291 ],
292 body=body),
293 self.assertStatus(400)
294 self.assertErrorPage(
295 400,
296 "The request entity could not be decoded. The following charsets "
297 "were attempted: ['us-ascii', 'utf-8']")
298
300 self.getPage('/nontext')
301 self.assertHeader('Content-Type', 'application/binary;charset=utf-8')
302 self.assertBody('\x00\x01\x02\x03')
303
305 # Default encoding should be utf-8
306 self.getPage('/mao_zedong')
307 self.assertBody(sing8)
308
309 # Ask for utf-16.
310 self.getPage('/mao_zedong', [('Accept-Charset', 'utf-16')])
311 self.assertHeader('Content-Type', 'text/html;charset=utf-16')
312 self.assertBody(sing16)
313
314 # Ask for multiple encodings. ISO-8859-1 should fail, and utf-16
315 # should be produced.
316 self.getPage('/mao_zedong', [('Accept-Charset',
317 'iso-8859-1;q=1, utf-16;q=0.5')])
318 self.assertBody(sing16)
319
320 # The "*" value should default to our default_encoding, utf-8
321 self.getPage('/mao_zedong', [('Accept-Charset', '*;q=1, utf-7;q=.2')])
322 self.assertBody(sing8)
323
324 # Only allow iso-8859-1, which should fail and raise 406.
325 self.getPage('/mao_zedong', [('Accept-Charset', 'iso-8859-1, *;q=0')])
326 self.assertStatus("406 Not Acceptable")
327 self.assertInBody("Your client sent this Accept-Charset header: "
328 "iso-8859-1, *;q=0. We tried these charsets: "
329 "iso-8859-1.")
330
331 # Ask for x-mac-ce, which should be unknown. See ticket #569.
332 self.getPage('/mao_zedong', [('Accept-Charset',
333 'us-ascii, ISO-8859-1, x-mac-ce')])
334 self.assertStatus("406 Not Acceptable")
335 self.assertInBody("Your client sent this Accept-Charset header: "
336 "us-ascii, ISO-8859-1, x-mac-ce. We tried these "
337 "charsets: ISO-8859-1, us-ascii, x-mac-ce.")
338
339 # Test the 'encoding' arg to encode.
340 self.getPage('/utf8')
341 self.assertBody(sing8)
342 self.getPage('/utf8', [('Accept-Charset', 'us-ascii, ISO-8859-1')])
343 self.assertStatus("406 Not Acceptable")
344
346 zbuf = BytesIO()
347 zfile = gzip.GzipFile(mode='wb', fileobj=zbuf, compresslevel=9)
348 zfile.write(ntob("Hello, world"))
349 zfile.close()
350
351 self.getPage('/gzip/', headers=[("Accept-Encoding", "gzip")])
352 self.assertInBody(zbuf.getvalue()[:3])
353 self.assertHeader("Vary", "Accept-Encoding")
354 self.assertHeader("Content-Encoding", "gzip")
355
356 # Test when gzip is denied.
357 self.getPage('/gzip/', headers=[("Accept-Encoding", "identity")])
358 self.assertHeader("Vary", "Accept-Encoding")
359 self.assertNoHeader("Content-Encoding")
360 self.assertBody("Hello, world")
361
362 self.getPage('/gzip/', headers=[("Accept-Encoding", "gzip;q=0")])
363 self.assertHeader("Vary", "Accept-Encoding")
364 self.assertNoHeader("Content-Encoding")
365 self.assertBody("Hello, world")
366
367 self.getPage('/gzip/', headers=[("Accept-Encoding", "*;q=0")])
368 self.assertStatus(406)
369 self.assertNoHeader("Content-Encoding")
370 self.assertErrorPage(406, "identity, gzip")
371
372 # Test for ticket #147
373 self.getPage('/gzip/noshow', headers=[("Accept-Encoding", "gzip")])
374 self.assertNoHeader('Content-Encoding')
375 self.assertStatus(500)
376 self.assertErrorPage(500, pattern="IndexError\n")
377
378 # In this case, there's nothing we can do to deliver a
379 # readable page, since 1) the gzip header is already set,
380 # and 2) we may have already written some of the body.
381 # The fix is to never stream yields when using gzip.
382 if (cherrypy.server.protocol_version == "HTTP/1.0" or
383 getattr(cherrypy.server, "using_apache", False)):
384 self.getPage('/gzip/noshow_stream',
385 headers=[("Accept-Encoding", "gzip")])
386 self.assertHeader('Content-Encoding', 'gzip')
387 self.assertInBody('\x1f\x8b\x08\x00')
388 else:
389 # The wsgiserver will simply stop sending data, and the HTTP client
390 # will error due to an incomplete chunk-encoded stream.
391 self.assertRaises((ValueError, IncompleteRead), self.getPage,
392 '/gzip/noshow_stream',
393 headers=[("Accept-Encoding", "gzip")])
394
398
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Tue Dec 2 09:59:41 2014 | http://epydoc.sourceforge.net |