URL: http://github.com/python/cpython/pull/113648.diff
self): self.assertEqual(co1.co_filename, "f1") self.assertEqual(co2.co_filename, "f2") + def test_no_allow_code(self): + data = {'a': [({0},)]} + dump = marshal.dumps(data, allow_code=False) + self.assertEqual(marshal.loads(dump, allow_code=False), data) + + f = io.BytesIO() + marshal.dump(data, f, allow_code=False) + f.seek(0) + self.assertEqual(marshal.load(f, allow_code=False), data) + + co = ExceptionTestCase.test_exceptions.__code__ + data = {'a': [({co, 0},)]} + dump = marshal.dumps(data, allow_code=True) + self.assertEqual(marshal.loads(dump, allow_code=True), data) + with self.assertRaises(ValueError): + marshal.dumps(data, allow_code=False) + with self.assertRaises(ValueError): + marshal.loads(dump, allow_code=False) + + marshal.dump(data, io.BytesIO(), allow_code=True) + self.assertEqual(marshal.load(io.BytesIO(dump), allow_code=True), data) + with self.assertRaises(ValueError): + marshal.dump(data, io.BytesIO(), allow_code=False) + with self.assertRaises(ValueError): + marshal.load(io.BytesIO(dump), allow_code=False) + @requires_debug_ranges() def test_minimal_linetable_with_no_debug_ranges(self): # Make sure when demarshalling objects with `-X no_debug_ranges` diff --git a/Misc/NEWS.d/next/Library/2024-01-02-12-41-59.gh-issue-113626.i1PPY_.rst b/Misc/NEWS.d/next/Library/2024-01-02-12-41-59.gh-issue-113626.i1PPY_.rst new file mode 100644 index 00000000000000..5c37dad5808ce3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-02-12-41-59.gh-issue-113626.i1PPY_.rst @@ -0,0 +1,3 @@ +Add support for the *allow_code* argument in the :mod:`marshal` module. +Passing ``allow_code=False`` prevents serialization and de-serialization of +code objects which is incompatible between Python versions. diff --git a/Python/clinic/marshal.c.h b/Python/clinic/marshal.c.h index e6b0f1999a41c5..c19a3ed5050ed3 100644 --- a/Python/clinic/marshal.c.h +++ b/Python/clinic/marshal.c.h @@ -2,10 +2,14 @@ preserve [clinic start generated code]*/ -#include "pycore_modsupport.h" // _PyArg_CheckPositional() +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(marshal_dump__doc__, -"dump($module, value, file, version=version, /)\n" +"dump($module, value, file, version=version, /, *, allow_code=True)\n" "--\n" "\n" "Write the value on the open file.\n" @@ -16,53 +20,95 @@ PyDoc_STRVAR(marshal_dump__doc__, " Must be a writeable binary file.\n" " version\n" " Indicates the data format that dump should use.\n" +" allow_code\n" +" Allow to write code objects.\n" "\n" "If the value has (or contains an object that has) an unsupported type, a\n" "ValueError exception is raised - but garbage data will also be written\n" "to the file. The object will not be properly read back by load()."); #define MARSHAL_DUMP_METHODDEF \ - {"dump", _PyCFunction_CAST(marshal_dump), METH_FASTCALL, marshal_dump__doc__}, + {"dump", _PyCFunction_CAST(marshal_dump), METH_FASTCALL|METH_KEYWORDS, marshal_dump__doc__}, static PyObject * marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file, - int version); + int version, int allow_code); static PyObject * -marshal_dump(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +marshal_dump(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(allow_code), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "", "", "allow_code", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "dump", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[4]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 2; PyObject *value; PyObject *file; int version = Py_MARSHAL_VERSION; + int allow_code = 1; - if (!_PyArg_CheckPositional("dump", nargs, 2, 3)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 3, 0, argsbuf); + if (!args) { goto exit; } value = args[0]; file = args[1]; if (nargs < 3) { - goto skip_optional; + goto skip_optional_posonly; } + noptargs--; version = PyLong_AsInt(args[2]); if (version == -1 && PyErr_Occurred()) { goto exit; } -skip_optional: - return_value = marshal_dump_impl(module, value, file, version); +skip_optional_posonly: + if (!noptargs) { + goto skip_optional_kwonly; + } + allow_code = PyObject_IsTrue(args[3]); + if (allow_code < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = marshal_dump_impl(module, value, file, version, allow_code); exit: return return_value; } PyDoc_STRVAR(marshal_load__doc__, -"load($module, file, /)\n" +"load($module, file, /, *, allow_code=True)\n" "--\n" "\n" "Read one value from the open file and return it.\n" "\n" " file\n" " Must be readable binary file.\n" +" allow_code\n" +" Allow to load code objects.\n" "\n" "If no valid value is read (e.g. because the data has a different Python\n" "version\'s incompatible marshal format), raise EOFError, ValueError or\n" @@ -72,10 +118,66 @@ PyDoc_STRVAR(marshal_load__doc__, "dump(), load() will substitute None for the unmarshallable type."); #define MARSHAL_LOAD_METHODDEF \ - {"load", (PyCFunction)marshal_load, METH_O, marshal_load__doc__}, + {"load", _PyCFunction_CAST(marshal_load), METH_FASTCALL|METH_KEYWORDS, marshal_load__doc__}, + +static PyObject * +marshal_load_impl(PyObject *module, PyObject *file, int allow_code); + +static PyObject * +marshal_load(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(allow_code), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "allow_code", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "load", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + PyObject *file; + int allow_code = 1; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + file = args[0]; + if (!noptargs) { + goto skip_optional_kwonly; + } + allow_code = PyObject_IsTrue(args[1]); + if (allow_code < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = marshal_load_impl(module, file, allow_code); + +exit: + return return_value; +} PyDoc_STRVAR(marshal_dumps__doc__, -"dumps($module, value, version=version, /)\n" +"dumps($module, value, version=version, /, *, allow_code=True)\n" "--\n" "\n" "Return the bytes object that would be written to a file by dump(value, file).\n" @@ -84,66 +186,150 @@ PyDoc_STRVAR(marshal_dumps__doc__, " Must be a supported type.\n" " version\n" " Indicates the data format that dumps should use.\n" +" allow_code\n" +" Allow to write code objects.\n" "\n" "Raise a ValueError exception if value has (or contains an object that has) an\n" "unsupported type."); #define MARSHAL_DUMPS_METHODDEF \ - {"dumps", _PyCFunction_CAST(marshal_dumps), METH_FASTCALL, marshal_dumps__doc__}, + {"dumps", _PyCFunction_CAST(marshal_dumps), METH_FASTCALL|METH_KEYWORDS, marshal_dumps__doc__}, static PyObject * -marshal_dumps_impl(PyObject *module, PyObject *value, int version); +marshal_dumps_impl(PyObject *module, PyObject *value, int version, + int allow_code); static PyObject * -marshal_dumps(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +marshal_dumps(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(allow_code), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "", "allow_code", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "dumps", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; PyObject *value; int version = Py_MARSHAL_VERSION; + int allow_code = 1; - if (!_PyArg_CheckPositional("dumps", nargs, 1, 2)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf); + if (!args) { goto exit; } value = args[0]; if (nargs < 2) { - goto skip_optional; + goto skip_optional_posonly; } + noptargs--; version = PyLong_AsInt(args[1]); if (version == -1 && PyErr_Occurred()) { goto exit; } -skip_optional: - return_value = marshal_dumps_impl(module, value, version); +skip_optional_posonly: + if (!noptargs) { + goto skip_optional_kwonly; + } + allow_code = PyObject_IsTrue(args[2]); + if (allow_code < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = marshal_dumps_impl(module, value, version, allow_code); exit: return return_value; } PyDoc_STRVAR(marshal_loads__doc__, -"loads($module, bytes, /)\n" +"loads($module, bytes, /, *, allow_code=True)\n" "--\n" "\n" "Convert the bytes-like object to a value.\n" "\n" +" allow_code\n" +" Allow to load code objects.\n" +"\n" "If no valid value is found, raise EOFError, ValueError or TypeError. Extra\n" "bytes in the input are ignored."); #define MARSHAL_LOADS_METHODDEF \ - {"loads", (PyCFunction)marshal_loads, METH_O, marshal_loads__doc__}, + {"loads", _PyCFunction_CAST(marshal_loads), METH_FASTCALL|METH_KEYWORDS, marshal_loads__doc__}, static PyObject * -marshal_loads_impl(PyObject *module, Py_buffer *bytes); +marshal_loads_impl(PyObject *module, Py_buffer *bytes, int allow_code); static PyObject * -marshal_loads(PyObject *module, PyObject *arg) +marshal_loads(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(allow_code), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "allow_code", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "loads", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer bytes = {NULL, NULL}; + int allow_code = 1; - if (PyObject_GetBuffer(arg, &bytes, PyBUF_SIMPLE) != 0) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &bytes, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + allow_code = PyObject_IsTrue(args[1]); + if (allow_code < 0) { goto exit; } - return_value = marshal_loads_impl(module, &bytes); +skip_optional_kwonly: + return_value = marshal_loads_impl(module, &bytes, allow_code); exit: /* Cleanup for bytes */ @@ -153,4 +339,4 @@ marshal_loads(PyObject *module, PyObject *arg) return return_value; } -/*[clinic end generated code: output=92d2d47aac9128ee input=a9049054013a1b77]*/ +/*[clinic end generated code: output=1575b9a3ae48ad3d input=a9049054013a1b77]*/ diff --git a/Python/marshal.c b/Python/marshal.c index 8940582c7f5328..daec7415b3fc7e 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -78,6 +78,7 @@ module marshal #define WFERR_UNMARSHALLABLE 1 #define WFERR_NESTEDTOODEEP 2 #define WFERR_NOMEMORY 3 +#define WFERR_CODE_NOT_ALLOWED 4 typedef struct { FILE *fp; @@ -89,6 +90,7 @@ typedef struct { char *buf; _Py_hashtable_t *hashtable; int version; + int allow_code; } WFILE; #define w_byte(c, p) do { \ @@ -225,6 +227,9 @@ w_short_pstring(const void *s, Py_ssize_t n, WFILE *p) w_byte((t) | flag, (p)); \ } while(0) +static PyObject * +_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code); + static void w_PyLong(const PyLongObject *ob, char flag, WFILE *p) { @@ -520,7 +525,8 @@ w_complex_object(PyObject *v, char flag, WFILE *p) } Py_ssize_t i = 0; while (_PySet_NextEntry(v, &pos, &value, &hash)) { - PyObject *dump = PyMarshal_WriteObjectToString(value, p->version); + PyObject *dump = _PyMarshal_WriteObjectToString(value, + p->version, p->allow_code); if (dump == NULL) { p->error = WFERR_UNMARSHALLABLE; Py_DECREF(pairs); @@ -549,6 +555,10 @@ w_complex_object(PyObject *v, char flag, WFILE *p) Py_DECREF(pairs); } else if (PyCode_Check(v)) { + if (!p->allow_code) { + p->error = WFERR_CODE_NOT_ALLOWED; + return; + } PyCodeObject *co = (PyCodeObject *)v; PyObject *co_code = _PyCode_GetCode(co); if (co_code == NULL) { @@ -657,6 +667,7 @@ PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version) wf.end = wf.ptr + sizeof(buf); wf.error = WFERR_OK; wf.version = version; + wf.allow_code = 1; if (w_init_refs(&wf, version)) { return; /* caller must check PyErr_Occurred() */ } @@ -674,6 +685,7 @@ typedef struct { char *buf; Py_ssize_t buf_size; PyObject *refs; /* a list */ + int allow_code; } RFILE; static const char * @@ -1364,6 +1376,11 @@ r_object(RFILE *p) PyObject* linetable = NULL; PyObject *exceptiontable = NULL; + if (!p->allow_code) { + PyErr_SetString(PyExc_ValueError, + "unmarshalling code objects is disallowed"); + break; + } idx = r_ref_reserve(flag, p); if (idx < 0) break; @@ -1609,6 +1626,7 @@ PyMarshal_ReadObjectFromFile(FILE *fp) { RFILE rf; PyObject *result; + rf.allow_code = 1; rf.fp = fp; rf.readable = NULL; rf.depth = 0; @@ -1629,6 +1647,7 @@ PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len) { RFILE rf; PyObject *result; + rf.allow_code = 1; rf.fp = NULL; rf.readable = NULL; rf.ptr = str; @@ -1645,8 +1664,8 @@ PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len) return result; } -PyObject * -PyMarshal_WriteObjectToString(PyObject *x, int version) +static PyObject * +_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code) { WFILE wf; @@ -1661,6 +1680,7 @@ PyMarshal_WriteObjectToString(PyObject *x, int version) wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str); wf.error = WFERR_OK; wf.version = version; + wf.allow_code = allow_code; if (w_init_refs(&wf, version)) { Py_DECREF(wf.str); return NULL; @@ -1674,17 +1694,35 @@ PyMarshal_WriteObjectToString(PyObject *x, int version) } if (wf.error != WFERR_OK) { Py_XDECREF(wf.str); - if (wf.error == WFERR_NOMEMORY) + switch (wf.error) { + case WFERR_NOMEMORY: PyErr_NoMemory(); - else + break; + case WFERR_NESTEDTOODEEP: PyErr_SetString(PyExc_ValueError, - (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object" - :"object too deeply nested to marshal"); + "object too deeply nested to marshal"); + break; + case WFERR_CODE_NOT_ALLOWED: + PyErr_SetString(PyExc_ValueError, + "marshalling code objects is disallowed"); + break; + default: + case WFERR_UNMARSHALLABLE: + PyErr_SetString(PyExc_ValueError, + "unmarshallable object"); + break; + } return NULL; } return wf.str; } +PyObject * +PyMarshal_WriteObjectToString(PyObject *x, int version) +{ + return _PyMarshal_WriteObjectToString(x, version, 1); +} + /* And an interface for Python programs... */ /*[clinic input] marshal.dump @@ -1696,6 +1734,9 @@ marshal.dump version: int(c_default="Py_MARSHAL_VERSION") = version Indicates the data format that dump should use. / + * + allow_code: bool = True + Allow to write code objects. Write the value on the open file. @@ -1706,14 +1747,14 @@ to the file. The object will not be properly read back by load(). static PyObject * marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file, - int version) -/*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/ + int version, int allow_code) +/*[clinic end generated code: output=429e5fd61c2196b9 input=041f7f6669b0aafb]*/ { /* XXX Quick hack -- need to do this differently */ PyObject *s; PyObject *res; - s = PyMarshal_WriteObjectToString(value, version); + s = _PyMarshal_WriteObjectToString(value, version, allow_code); if (s == NULL) return NULL; res = PyObject_CallMethodOneArg(file, &_Py_ID(write), s); @@ -1727,6 +1768,9 @@ marshal.load file: object Must be readable binary file. / + * + allow_code: bool = True + Allow to load code objects. Read one value from the open file and return it. @@ -1739,8 +1783,8 @@ dump(), load() will substitute None for the unmarshallable type. [clinic start generated code]*/ static PyObject * -marshal_load(PyObject *module, PyObject *file) -/*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/ +marshal_load_impl(PyObject *module, PyObject *file, int allow_code) +/*[clinic end generated code: output=0c1aaf3546ae3ed3 input=2dca7b570653b82f]*/ { PyObject *data, *result; RFILE rf; @@ -1762,6 +1806,7 @@ marshal_load(PyObject *module, PyObject *file) result = NULL; } else { + rf.allow_code = allow_code; rf.depth = 0; rf.fp = NULL; rf.readable = file; @@ -1787,6 +1832,9 @@ marshal.dumps version: int(c_default="Py_MARSHAL_VERSION") = version Indicates the data format that dumps should use. / + * + allow_code: bool = True + Allow to write code objects. Return the bytes object that would be written to a file by dump(value, file). @@ -1795,10 +1843,11 @@ unsupported type. [clinic start generated code]*/ static PyObject * -marshal_dumps_impl(PyObject *module, PyObject *value, int version) -/*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/ +marshal_dumps_impl(PyObject *module, PyObject *value, int version, + int allow_code) +/*[clinic end generated code: output=115f90da518d1d49 input=167eaecceb63f0a8]*/ { - return PyMarshal_WriteObjectToString(value, version); + return _PyMarshal_WriteObjectToString(value, version, allow_code); } /*[clinic input] @@ -1806,6 +1855,9 @@ marshal.loads bytes: Py_buffer / + * + allow_code: bool = True + Allow to load code objects. Convert the bytes-like object to a value. @@ -1814,13 +1866,14 @@ bytes in the input are ignored. [clinic start generated code]*/ static PyObject * -marshal_loads_impl(PyObject *module, Py_buffer *bytes) -/*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/ +marshal_loads_impl(PyObject *module, Py_buffer *bytes, int allow_code) +/*[clinic end generated code: output=62c0c538d3edc31f input=14de68965b45aaa7]*/ { RFILE rf; char *s = bytes->buf; Py_ssize_t n = bytes->len; PyObject* result; + rf.allow_code = allow_code; rf.fp = NULL; rf.readable = NULL; rf.ptr = s;Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: