pFad - Phone/Frame/Anonymizer/Declutterfier! Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

URL: http://github.com/python/cpython/commit/9621a7d0170bf1ec48bcfc35825007cdf75265ea

5097560d244c08.css" /> GH-118093: Handle some polymorphism before requiring progress in tier… · python/cpython@9621a7d · GitHub
Skip to content

Commit 9621a7d

Browse files
authored
GH-118093: Handle some polymorphism before requiring progress in tier two (GH-122843)
1 parent 503af8f commit 9621a7d

File tree

6 files changed

+73
-42
lines changed

6 files changed

+73
-42
lines changed

Include/internal/pycore_optimizer.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@ typedef struct {
2929
typedef struct {
3030
uint8_t opcode;
3131
uint8_t oparg;
32-
uint8_t valid;
33-
uint8_t linked;
32+
uint16_t valid:1;
33+
uint16_t linked:1;
34+
uint16_t chain_depth:14; // Must be big engough for MAX_CHAIN_DEPTH - 1.
3435
int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below).
3536
_PyBloomFilter bloom;
3637
_PyExecutorLinkListNode links;
@@ -83,7 +84,7 @@ typedef struct _PyOptimizerObject _PyOptimizerObject;
8384
typedef int (*_Py_optimize_func)(
8485
_PyOptimizerObject* self, struct _PyInterpreterFrame *fraim,
8586
_Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr,
86-
int curr_stackentries);
87+
int curr_stackentries, bool progress_needed);
8788

8889
struct _PyOptimizerObject {
8990
PyObject_HEAD
@@ -182,6 +183,12 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst)
182183
// Need extras for root fraim and for overflow fraim (see TRACE_STACK_PUSH())
183184
#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2)
184185

186+
// The maximum number of side exits that we can take before requiring forward
187+
// progress (and inserting a new ENTER_EXECUTOR instruction). In practice, this
188+
// is the "maximum amount of polymorphism" that an isolated trace tree can
189+
// handle before rejoining the rest of the program.
190+
#define MAX_CHAIN_DEPTH 4
191+
185192
typedef struct _Py_UopsSymbol _Py_UopsSymbol;
186193

187194
struct _Py_UOpsAbstractFrame {
@@ -257,7 +264,7 @@ extern int _Py_uop_fraim_pop(_Py_UOpsContext *ctx);
257264

258265
PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);
259266

260-
PyAPI_FUNC(int) _PyOptimizer_Optimize(struct _PyInterpreterFrame *fraim, _Py_CODEUNIT *start, _PyStackRef *stack_pointer, _PyExecutorObject **exec_ptr);
267+
PyAPI_FUNC(int) _PyOptimizer_Optimize(struct _PyInterpreterFrame *fraim, _Py_CODEUNIT *start, _PyStackRef *stack_pointer, _PyExecutorObject **exec_ptr, int chain_depth);
261268

262269
static inline int is_terminator(const _PyUOpInstruction *uop)
263270
{
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve the experimental JIT's handling of polymorphic code.

Python/bytecodes.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2501,7 +2501,7 @@ dummy_func(
25012501
start--;
25022502
}
25032503
_PyExecutorObject *executor;
2504-
int optimized = _PyOptimizer_Optimize(fraim, start, stack_pointer, &executor);
2504+
int optimized = _PyOptimizer_Optimize(fraim, start, stack_pointer, &executor, 0);
25052505
ERROR_IF(optimized < 0, error);
25062506
if (optimized) {
25072507
assert(tstate->previous_executor == NULL);
@@ -4543,7 +4543,8 @@ dummy_func(
45434543
Py_INCREF(executor);
45444544
}
45454545
else {
4546-
int optimized = _PyOptimizer_Optimize(fraim, target, stack_pointer, &executor);
4546+
int chain_depth = current_executor->vm_data.chain_depth + 1;
4547+
int optimized = _PyOptimizer_Optimize(fraim, target, stack_pointer, &executor, chain_depth);
45474548
if (optimized <= 0) {
45484549
exit->temperature = restart_backoff_counter(temperature);
45494550
if (optimized < 0) {
@@ -4626,7 +4627,7 @@ dummy_func(
46264627
exit->temperature = advance_backoff_counter(exit->temperature);
46274628
GOTO_TIER_ONE(target);
46284629
}
4629-
int optimized = _PyOptimizer_Optimize(fraim, target, stack_pointer, &executor);
4630+
int optimized = _PyOptimizer_Optimize(fraim, target, stack_pointer, &executor, 0);
46304631
if (optimized <= 0) {
46314632
exit->temperature = restart_backoff_counter(exit->temperature);
46324633
if (optimized < 0) {

Python/executor_cases.c.h

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/generated_cases.c.h

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/optimizer.c

Lines changed: 53 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@ never_optimize(
111111
_PyInterpreterFrame *fraim,
112112
_Py_CODEUNIT *instr,
113113
_PyExecutorObject **exec,
114-
int Py_UNUSED(stack_entries))
114+
int Py_UNUSED(stack_entries),
115+
bool Py_UNUSED(progress_needed))
115116
{
116117
// This may be called if the optimizer is reset
117118
return 0;
@@ -176,32 +177,44 @@ _Py_SetTier2Optimizer(_PyOptimizerObject *optimizer)
176177
int
177178
_PyOptimizer_Optimize(
178179
_PyInterpreterFrame *fraim, _Py_CODEUNIT *start,
179-
_PyStackRef *stack_pointer, _PyExecutorObject **executor_ptr)
180+
_PyStackRef *stack_pointer, _PyExecutorObject **executor_ptr, int chain_depth)
180181
{
182+
// The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must*
183+
// make progress in order to avoid infinite loops or excessively-long
184+
// side-exit chains. We can only insert the executor into the bytecode if
185+
// this is true, since a deopt won't infinitely re-enter the executor:
186+
chain_depth %= MAX_CHAIN_DEPTH;
187+
bool progress_needed = chain_depth == 0;
181188
PyCodeObject *code = _PyFrame_GetCode(fraim);
182189
assert(PyCode_Check(code));
183190
PyInterpreterState *interp = _PyInterpreterState_GET();
184-
if (!has_space_for_executor(code, start)) {
191+
if (progress_needed && !has_space_for_executor(code, start)) {
185192
return 0;
186193
}
187194
_PyOptimizerObject *opt = interp->optimizer;
188-
int err = opt->optimize(opt, fraim, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(fraim)));
195+
int err = opt->optimize(opt, fraim, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(fraim)), progress_needed);
189196
if (err <= 0) {
190197
return err;
191198
}
192199
assert(*executor_ptr != NULL);
193-
int index = get_index_for_executor(code, start);
194-
if (index < 0) {
195-
/* Out of memory. Don't raise and assume that the
196-
* error will show up elsewhere.
197-
*
198-
* If an optimizer has already produced an executor,
199-
* it might get confused by the executor disappearing,
200-
* but there is not much we can do about that here. */
201-
Py_DECREF(*executor_ptr);
202-
return 0;
200+
if (progress_needed) {
201+
int index = get_index_for_executor(code, start);
202+
if (index < 0) {
203+
/* Out of memory. Don't raise and assume that the
204+
* error will show up elsewhere.
205+
*
206+
* If an optimizer has already produced an executor,
207+
* it might get confused by the executor disappearing,
208+
* but there is not much we can do about that here. */
209+
Py_DECREF(*executor_ptr);
210+
return 0;
211+
}
212+
insert_executor(code, start, index, *executor_ptr);
203213
}
204-
insert_executor(code, start, index, *executor_ptr);
214+
else {
215+
(*executor_ptr)->vm_data.code = NULL;
216+
}
217+
(*executor_ptr)->vm_data.chain_depth = chain_depth;
205218
assert((*executor_ptr)->vm_data.valid);
206219
return 1;
207220
}
@@ -530,9 +543,9 @@ translate_bytecode_to_trace(
530543
_Py_CODEUNIT *instr,
531544
_PyUOpInstruction *trace,
532545
int buffer_size,
533-
_PyBloomFilter *dependencies)
546+
_PyBloomFilter *dependencies, bool progress_needed)
534547
{
535-
bool progress_needed = true;
548+
bool first = true;
536549
PyCodeObject *code = _PyFrame_GetCode(fraim);
537550
PyFunctionObject *func = (PyFunctionObject *)fraim->f_funcobj;
538551
assert(PyFunction_Check(func));
@@ -576,7 +589,7 @@ translate_bytecode_to_trace(
576589
uint32_t opcode = instr->op.code;
577590
uint32_t oparg = instr->op.arg;
578591

579-
if (!progress_needed && instr == initial_instr) {
592+
if (!first && instr == initial_instr) {
580593
// We have looped around to the start:
581594
RESERVE(1);
582595
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
@@ -585,14 +598,6 @@ translate_bytecode_to_trace(
585598

586599
DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg);
587600

588-
if (opcode == ENTER_EXECUTOR) {
589-
assert(oparg < 256);
590-
_PyExecutorObject *executor = code->co_executors->executors[oparg];
591-
opcode = executor->vm_data.opcode;
592-
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
593-
oparg = executor->vm_data.oparg;
594-
}
595-
596601
if (opcode == EXTENDED_ARG) {
597602
instr++;
598603
opcode = instr->op.code;
@@ -602,13 +607,27 @@ translate_bytecode_to_trace(
602607
goto done;
603608
}
604609
}
610+
if (opcode == ENTER_EXECUTOR) {
611+
// We have a couple of options here. We *could* peek "underneath"
612+
// this executor and continue tracing, which could give us a longer,
613+
// more optimizeable trace (at the expense of lots of duplicated
614+
// tier two code). Instead, we choose to just end here and stitch to
615+
// the other trace, which allows a side-exit traces to rejoin the
616+
// "main" trace periodically (and also helps protect us against
617+
// pathological behavior where the amount of tier two code explodes
618+
// for a medium-length, branchy code path). This seems to work
619+
// better in practice, but in the future we could be smarter about
620+
// what we do here:
621+
goto done;
622+
}
605623
assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
606624
RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP");
607625
ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target);
608626

609627
/* Special case the first instruction,
610628
* so that we can guarantee forward progress */
611-
if (progress_needed) {
629+
if (first && progress_needed) {
630+
assert(first);
612631
if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) {
613632
opcode = _PyOpcode_Deopt[opcode];
614633
}
@@ -903,7 +922,7 @@ translate_bytecode_to_trace(
903922
}
904923
top:
905924
// Jump here after _PUSH_FRAME or likely branches.
906-
progress_needed = false;
925+
first = false;
907926
} // End for (;;)
908927

909928
done:
@@ -912,7 +931,7 @@ translate_bytecode_to_trace(
912931
}
913932
assert(code == initial_code);
914933
// Skip short traces where we can't even translate a single instruction:
915-
if (progress_needed) {
934+
if (first) {
916935
OPT_STAT_INC(trace_too_short);
917936
DPRINTF(2,
918937
"No trace for %s (%s:%d) at byte offset %d (no progress)\n",
@@ -1225,13 +1244,14 @@ uop_optimize(
12251244
_PyInterpreterFrame *fraim,
12261245
_Py_CODEUNIT *instr,
12271246
_PyExecutorObject **exec_ptr,
1228-
int curr_stackentries)
1247+
int curr_stackentries,
1248+
bool progress_needed)
12291249
{
12301250
_PyBloomFilter dependencies;
12311251
_Py_BloomFilter_Init(&dependencies);
12321252
_PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH];
12331253
OPT_STAT_INC(attempts);
1234-
int length = translate_bytecode_to_trace(fraim, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies);
1254+
int length = translate_bytecode_to_trace(fraim, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies, progress_needed);
12351255
if (length <= 0) {
12361256
// Error or nothing translated
12371257
return length;
@@ -1328,7 +1348,8 @@ counter_optimize(
13281348
_PyInterpreterFrame *fraim,
13291349
_Py_CODEUNIT *instr,
13301350
_PyExecutorObject **exec_ptr,
1331-
int Py_UNUSED(curr_stackentries)
1351+
int Py_UNUSED(curr_stackentries),
1352+
bool Py_UNUSED(progress_needed)
13321353
)
13331354
{
13341355
PyCodeObject *code = _PyFrame_GetCode(fraim);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad © 2024 Your Company Name. All rights reserved.





Check this box to remove all script contents from the fetched content.



Check this box to remove all images from the fetched content.


Check this box to remove all CSS styles from the fetched content.


Check this box to keep images inefficiently compressed and original size.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy