@@ -42,136 +42,27 @@ module CfgSigImpl implements BB::CfgSig<Py::Location> {
4242/**
4343 * Gets the Python AST node corresponding to CFG node `n`, if any.
4444 *
45- * Entry/exit/synthetic CFG nodes have no Python AST node, so this is
46- * partial.
45+ * Multiple CFG nodes may map to the same AST node (e.g. `TBeforeNode(Call)`
46+ * and `TAstNode(Call)` both map to `Py::Call`). This is a pure translation;
47+ * uniqueness constraints are enforced at the dataflow layer where needed.
4748 */
4849private Py:: AstNode toAst ( CfgImpl:: ControlFlowNode n ) {
4950 result = CfgImpl:: astNodeToPyNode ( n .getAstNode ( ) )
5051}
5152
5253/**
53- * Holds if `n` is a CFG node representing the canonical position for an
54- * AST node from the dataflow library's perspective.
54+ * A control flow node.
5555 *
56- * For most expressions this is the "after"-evaluation point (post-order
57- * representative). For statements it is the post-order node when one
58- * exists. We additionally include the synthetic entry/exit nodes for the
59- * benefit of API consumers that ask "is this the entry node of a scope?" .
56+ * This is the full set of CFG nodes from the shared library — it includes
57+ * before-nodes, in-order/ post-order nodes, after-value-split nodes, and
58+ * entry/exit nodes. This enables full control-flow-level reasoning and
59+ * compatibility with the shared control-flow reachability library .
6060 *
61- * In conditional contexts the after-position of a boolean expression
62- * splits into separate `isAfterTrue` and `isAfterFalse` nodes; both are
63- * canonical, so a single AST expression may correspond to more than one
64- * `ControlFlowNode`.
65- */
66- private predicate isCanonical ( CfgImpl:: ControlFlowNode n ) {
67- n .isAfter ( _)
68- or
69- n instanceof CfgImpl:: ControlFlow:: EntryNode
70- or
71- n instanceof CfgImpl:: ControlFlow:: ExitNode
72- or
73- // Annotated exit nodes (normal + abnormal) — needed so that dataflow
74- // consumers can ask "is this the normal-exit of a scope?" and also
75- // so that scope-exit synthetic uses in SsaImpl can attach here.
76- n instanceof CfgImpl:: ControlFlow:: AnnotatedExitNode
77- }
78-
79- /**
80- * Holds if `n` is genuinely the `TAfterValueNode` variant for a boolean-true
81- * outcome of its AST node.
82- *
83- * The shared CFG's `isAfterValue` predicate has a kind-mismatch fallback
84- * (see `ControlFlowGraph.qll`'s `isAfterValue` lines 870-892): when asking
85- * `isAfterValue(_, BooleanSuccessor true)` on, say, an emptiness-empty
86- * variant, it falsely returns `true` because the kinds differ and Python
87- * does not provide `successorValueImplies`. The same fallback also makes
88- * `TAfterNode` (the unsplit case) satisfy `isAfterValue(_, t)` for *every*
89- * `t`.
90- *
91- * The combination `isAfterTrue ∧ ¬isAfterFalse` excludes both: a genuine
92- * boolean-true variant satisfies `isAfterTrue` directly (newtype branch 3)
93- * but not `isAfterFalse` (the dual variant, same kind, no fallback). A
94- * non-boolean variant satisfies both via the cross-kind fallback. A
95- * `TAfterNode` satisfies both via branch 1. So `isAfterTrue ∧ ¬isAfterFalse`
96- * picks exactly the genuine boolean-true variant.
97- */
98- private predicate isGenuineAfterTrue ( ControlFlowNode n ) {
99- n .isAfterTrue ( _) and not n .isAfterFalse ( _)
100- }
101-
102- /**
103- * Holds if `n` is genuinely the `TAfterValueNode` variant for the "empty"
104- * outcome of its AST node (e.g. `for x in xs: ...` when `xs` is empty).
105- *
106- * See `isGenuineAfterTrue` for why we cannot just use a single
107- * `isAfterValue` check.
108- */
109- private predicate isGenuineAfterEmpty ( ControlFlowNode n ) {
110- exists ( EmptinessSuccessor empty |
111- empty .getValue ( ) = true and n .isAfterValue ( n .getAstNode ( ) , empty )
112- ) and
113- not exists ( EmptinessSuccessor nonEmpty |
114- nonEmpty .getValue ( ) = false and n .isAfterValue ( n .getAstNode ( ) , nonEmpty )
115- )
116- }
117-
118- /**
119- * Holds if `n` is genuinely the `TAfterValueNode` variant for the "matched"
120- * outcome of its AST node (e.g. a `match` case-pattern that matched).
121- *
122- * See `isGenuineAfterTrue` for why we cannot just use a single
123- * `isAfterValue` check.
124- */
125- private predicate isGenuineAfterMatched ( ControlFlowNode n ) {
126- exists ( MatchingSuccessor matched |
127- matched .getValue ( ) = true and n .isAfterValue ( n .getAstNode ( ) , matched )
128- ) and
129- not exists ( MatchingSuccessor unmatched |
130- unmatched .getValue ( ) = false and n .isAfterValue ( n .getAstNode ( ) , unmatched )
131- )
132- }
133-
134- /**
135- * Holds if `n` is the canonical representative of its corresponding AST node
136- * for dataflow purposes.
137- *
138- * The shared CFG associates a single AST node with multiple `ControlFlowNode`s
139- * when the AST appears in a conditional context (boolean conditions split into
140- * `afterTrue`/`afterFalse`; for-loop iters split into `[empty]`/`[non-empty]`;
141- * `match`-case patterns split into `[matched]`/`[unmatched]`). These splits
142- * matter for control-flow analysis, but for dataflow purposes — where we
143- * ask "what is the value of this expression?" — a single representative
144- * suffices and is required to avoid double-counting calls, arguments, store
145- * steps, etc.
146- *
147- * The pick is structural: when an AST has a single `ControlFlowNode` (the
148- * normal `TAfterNode` or `TBeforeNode`-leaf case), that node is canonical.
149- * When an AST has a conditional split, the "positive" outcome variant
150- * (true / empty / matched) is canonical. The three split kinds are mutually
151- * exclusive per AST, so exactly one variant is selected.
152- */
153- predicate isCanonicalAstNodeRepresentative ( ControlFlowNode n ) {
154- // Non-split AST: the unique variant is canonical.
155- not exists ( ControlFlowNode other | other .getNode ( ) = n .getNode ( ) and other != n )
156- or
157- // Split AST: pick the "positive" outcome of the split.
158- isGenuineAfterTrue ( n )
159- or
160- isGenuineAfterEmpty ( n )
161- or
162- isGenuineAfterMatched ( n )
163- }
164-
165- /**
166- * A control flow node. Control flow nodes have a many-to-one relation
167- * with syntactic nodes, although most syntactic nodes have only one
168- * corresponding control flow node.
169- *
170- * Edges between control flow nodes include exceptional as well as normal
171- * control flow.
61+ * AST-level semantics (`getNode()`, `isLoad()`, typed wrappers, etc.)
62+ * are available only on the `injects` (canonical) node for each AST node.
63+ * Non-injects nodes are purely positional CFG nodes with no AST mapping.
17264 */
17365class ControlFlowNode extends CfgImpl:: ControlFlowNode {
174- ControlFlowNode ( ) { isCanonical ( this ) }
17566
17667 /** Gets the syntactic element corresponding to this flow node, if any. */
17768 Py:: AstNode getNode ( ) { result = toAst ( this ) }
@@ -180,36 +71,23 @@ class ControlFlowNode extends CfgImpl::ControlFlowNode {
18071 ControlFlowNode getAPredecessor ( ) { this = result .getASuccessor ( ) }
18172
18273 /** Gets a successor of this flow node. */
183- pragma [ inline]
184- ControlFlowNode getASuccessor ( ) { result = nextCanonical ( this ) }
74+ ControlFlowNode getASuccessor ( ) { result = super .getASuccessor ( ) }
18575
18676 /** Gets a successor for this node if the relevant condition is True. */
18777 ControlFlowNode getATrueSuccessor ( ) {
188- super .isAfterTrue ( _) and
189- exists ( CfgImpl:: ControlFlowNode other | other .isAfterFalse ( super .getAstNode ( ) ) ) and
190- result = nextCanonical ( this )
78+ result = super .getASuccessor ( any ( BooleanSuccessor t | t .getValue ( ) = true ) )
19179 }
19280
19381 /** Gets a successor for this node if the relevant condition is False. */
19482 ControlFlowNode getAFalseSuccessor ( ) {
195- super .isAfterFalse ( _) and
196- exists ( CfgImpl:: ControlFlowNode other | other .isAfterTrue ( super .getAstNode ( ) ) ) and
197- result = nextCanonical ( this )
83+ result = super .getASuccessor ( any ( BooleanSuccessor t | t .getValue ( ) = false ) )
19884 }
19985
20086 /** Gets a successor for this node if an exception is raised. */
201- ControlFlowNode getAnExceptionalSuccessor ( ) {
202- exists ( CfgImpl:: ControlFlowNode mid |
203- mid = super .getAnExceptionSuccessor ( ) and
204- result = nextCanonicalFrom ( mid )
205- )
206- }
87+ ControlFlowNode getAnExceptionalSuccessor ( ) { result = super .getAnExceptionSuccessor ( ) }
20788
20889 /** Gets a successor for this node if no exception is raised. */
209- ControlFlowNode getANormalSuccessor ( ) {
210- result = this .getASuccessor ( ) and
211- not result = this .getAnExceptionalSuccessor ( )
212- }
90+ ControlFlowNode getANormalSuccessor ( ) { result = super .getANormalSuccessor ( ) }
21391
21492 /** Gets the basic block containing this flow node. */
21593 BasicBlock getBasicBlock ( ) { result = super .getBasicBlock ( ) }
@@ -353,9 +231,6 @@ class ControlFlowNode extends CfgImpl::ControlFlowNode {
353231
354232 /** Holds if this flow node strictly reaches `other`. */
355233 predicate strictlyReaches ( ControlFlowNode other ) { this .getASuccessor + ( ) = other }
356-
357- /** Internal: raw successor predicate that does NOT skip non-canonical nodes. */
358- CfgImpl:: ControlFlowNode getASuccessorRaw ( ) { result = super .getASuccessor ( ) }
359234}
360235
361236/**
@@ -377,53 +252,33 @@ private predicate augstore(ControlFlowNode load, ControlFlowNode store) {
377252 load = store
378253}
379254
380- /**
381- * Gets the nearest canonical CFG node reachable from `n` via one or more
382- * raw CFG edges (skipping non-canonical intermediaries).
383- */
384- private CfgImpl:: ControlFlowNode nextCanonicalFrom ( CfgImpl:: ControlFlowNode n ) {
385- result = n .getASuccessor ( ) and isCanonical ( result )
386- or
387- exists ( CfgImpl:: ControlFlowNode mid |
388- mid = n .getASuccessor ( ) and
389- not isCanonical ( mid ) and
390- result = nextCanonicalFrom ( mid )
391- )
392- }
393-
394- /** Gets the nearest canonical CFG successor of canonical node `n`. */
395- private ControlFlowNode nextCanonical ( ControlFlowNode n ) { result = nextCanonicalFrom ( n ) }
396-
397255/**
398256 * A basic block — a maximal-length sequence of control flow nodes such
399257 * that no node except the first has a predecessor outside the sequence,
400258 * and no node except the last has a successor outside the sequence.
401259 */
402260class BasicBlock extends CfgImpl:: BasicBlock {
403- /** Gets the `n`th node in this basic block, restricted to canonical nodes. */
404- ControlFlowNode getNode ( int n ) {
405- result = rank [ n + 1 ] ( ControlFlowNode node , int i | super .getNode ( i ) = node | node order by i )
406- }
261+ /** Gets the `n`th node in this basic block. */
262+ ControlFlowNode getNode ( int n ) { result = super .getNode ( n ) }
407263
408264 /** Gets a node in this basic block. */
409- ControlFlowNode getANode ( ) { result = this .getNode ( _) }
265+ ControlFlowNode getANode ( ) { result = super .getNode ( _) }
410266
411- /** Gets the first canonical node in this basic block. */
267+ /** Gets the first node in this basic block. */
412268 ControlFlowNode firstNode ( ) { result = this .getNode ( 0 ) }
413269
414- /** Gets the last canonical node in this basic block. */
415- ControlFlowNode getLastNode ( ) { result = this . getNode ( max ( int n | exists ( this . getNode ( n ) ) ) ) }
270+ /** Gets the last node in this basic block. */
271+ ControlFlowNode getLastNode ( ) { result = super . getLastNode ( ) }
416272
417273 /** Holds if this basic block contains `node`. */
418274 predicate contains ( ControlFlowNode node ) { node = this .getANode ( ) }
419275
420276 // Inherited from the shared library's `BasicBlock`:
421277 // getASuccessor(), getASuccessor(SuccessorType), getAPredecessor(),
422- // getNode(int) (raw, includes non-canonical), getANode() (raw),
423278 // strictlyDominates(), dominates(), getImmediateDominator(),
424279 // length(), inLoop().
425- // We expose canonical-only positional access via `getNode(int)` below
426- // (shadows the shared-lib version ) and additional Python-style helpers.
280+ // We shadow `getNode(int)` etc. to return `ControlFlowNode` (this
281+ // facade's type ) and add Python-style helpers below .
427282 /** Gets a true successor to this basic block. */
428283 BasicBlock getATrueSuccessor ( ) {
429284 result = super .getASuccessor ( any ( BooleanSuccessor t | t .getValue ( ) = true ) )
@@ -810,8 +665,13 @@ class IfExprNode extends ControlFlowNode {
810665
811666 override Py:: IfExp getNode ( ) { result = super .getNode ( ) }
812667
813- /** Gets the flow node for one of the operands of an if-expression. */
814- ControlFlowNode getAnOperand ( ) { result = this .getAPredecessor ( ) }
668+ /** Gets the flow node for one of the value operands (true-branch or false-branch). */
669+ ControlFlowNode getAnOperand ( ) {
670+ exists ( Py:: IfExp ie |
671+ ie = toAst ( this ) and
672+ ( toAst ( result ) = ie .getBody ( ) or toAst ( result ) = ie .getOrelse ( ) )
673+ )
674+ }
815675}
816676
817677/** A control flow node corresponding to an assignment expression (walrus `:=`). */
0 commit comments