117683Spst/* 217683Spst * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 317683Spst * The Regents of the University of California. All rights reserved. 417683Spst * 517683Spst * Redistribution and use in source and binary forms, with or without 617683Spst * modification, are permitted provided that: (1) source code distributions 717683Spst * retain the above copyright notice and this paragraph in its entirety, (2) 817683Spst * distributions including binary code include the above copyright notice and 917683Spst * this paragraph in its entirety in the documentation or other materials 1017683Spst * provided with the distribution, and (3) all advertising materials mentioning 1117683Spst * features or use of this software display the following acknowledgement: 1217683Spst * ``This product includes software developed by the University of California, 1317683Spst * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 1417683Spst * the University nor the names of its contributors may be used to endorse 1517683Spst * or promote products derived from this software without specific prior 1617683Spst * written permission. 1717683Spst * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 1817683Spst * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 1917683Spst * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 2017683Spst * 2117683Spst * Optimization module for tcpdump intermediate representation. 2217683Spst */ 2317683Spst#ifndef lint 24127664Sbmsstatic const char rcsid[] _U_ = 25214518Srpaulo "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.91 2008-01-02 04:16:46 guy Exp $ (LBL)"; 2617683Spst#endif 2717683Spst 2875107Sfenner#ifdef HAVE_CONFIG_H 2975107Sfenner#include "config.h" 3075107Sfenner#endif 3175107Sfenner 32214518Srpaulo#ifdef WIN32 33214518Srpaulo#include <pcap-stdinc.h> 34214518Srpaulo#else /* WIN32 */ 35214518Srpaulo#if HAVE_INTTYPES_H 36214518Srpaulo#include <inttypes.h> 37214518Srpaulo#elif HAVE_STDINT_H 38214518Srpaulo#include <stdint.h> 39214518Srpaulo#endif 40214518Srpaulo#ifdef HAVE_SYS_BITYPES_H 41214518Srpaulo#include <sys/bitypes.h> 42214518Srpaulo#endif 43214518Srpaulo#include <sys/types.h> 44214518Srpaulo#endif /* WIN32 */ 45214518Srpaulo 4617683Spst#include <stdio.h> 4717683Spst#include <stdlib.h> 4817683Spst#include <memory.h> 49146768Ssam#include <string.h> 5017683Spst 5175107Sfenner#include <errno.h> 5275107Sfenner 5317683Spst#include "pcap-int.h" 5417683Spst 5517683Spst#include "gencode.h" 5617683Spst 5717683Spst#ifdef HAVE_OS_PROTO_H 5817683Spst#include "os-proto.h" 5917683Spst#endif 6017683Spst 6117683Spst#ifdef BDEBUG 6217683Spstextern int dflag; 6317683Spst#endif 6417683Spst 65146768Ssam#if defined(MSDOS) && !defined(__DJGPP__) 66146768Ssamextern int _w32_ffs (int mask); 67146768Ssam#define ffs _w32_ffs 68146768Ssam#endif 6917683Spst 70190225Srpaulo#if defined(WIN32) && defined (_MSC_VER) 71190225Srpauloint ffs(int mask); 72190225Srpaulo#endif 73190225Srpaulo 74146768Ssam/* 75146768Ssam * Represents a deleted instruction. 76146768Ssam */ 7717683Spst#define NOP -1 7817683Spst 7917683Spst/* 80146768Ssam * Register numbers for use-def values. 81146768Ssam * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory 82146768Ssam * location. A_ATOM is the accumulator and X_ATOM is the index 83146768Ssam * register. 84146768Ssam */ 85146768Ssam#define A_ATOM BPF_MEMWORDS 86146768Ssam#define X_ATOM (BPF_MEMWORDS+1) 87146768Ssam 88146768Ssam/* 8917683Spst * This define is used to represent *both* the accumulator and 9017683Spst * x register in use-def computations. 9117683Spst * Currently, the use-def code assumes only one definition per instruction. 9217683Spst */ 9317683Spst#define AX_ATOM N_ATOMS 9417683Spst 9517683Spst/* 9617683Spst * A flag to indicate that further optimization is needed. 9717683Spst * Iterative passes are continued until a given pass yields no 9817683Spst * branch movement. 9917683Spst */ 10017683Spststatic int done; 10117683Spst 10217683Spst/* 10317683Spst * A block is marked if only if its mark equals the current mark. 10417683Spst * Rather than traverse the code array, marking each item, 'cur_mark' is 10517683Spst * incremented. This automatically makes each element unmarked. 10617683Spst */ 10717683Spststatic int cur_mark; 10817683Spst#define isMarked(p) ((p)->mark == cur_mark) 10917683Spst#define unMarkAll() cur_mark += 1 11017683Spst#define Mark(p) ((p)->mark = cur_mark) 11117683Spst 11217683Spststatic void opt_init(struct block *); 11317683Spststatic void opt_cleanup(void); 11417683Spst 11517683Spststatic void intern_blocks(struct block *); 11617683Spst 11717683Spststatic void find_inedges(struct block *); 11817683Spst#ifdef BDEBUG 11917683Spststatic void opt_dump(struct block *); 12017683Spst#endif 12117683Spst 12217683Spststatic int n_blocks; 12317683Spststruct block **blocks; 12417683Spststatic int n_edges; 12517683Spststruct edge **edges; 12617683Spst 12717683Spst/* 12817683Spst * A bit vector set representation of the dominators. 12917683Spst * We round up the set size to the next power of two. 13017683Spst */ 13117683Spststatic int nodewords; 13217683Spststatic int edgewords; 13317683Spststruct block **levels; 13417683Spstbpf_u_int32 *space; 13517683Spst#define BITS_PER_WORD (8*sizeof(bpf_u_int32)) 13617683Spst/* 13717683Spst * True if a is in uset {p} 13817683Spst */ 13917683Spst#define SET_MEMBER(p, a) \ 14017683Spst((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 14117683Spst 14217683Spst/* 14317683Spst * Add 'a' to uset p. 14417683Spst */ 14517683Spst#define SET_INSERT(p, a) \ 14617683Spst(p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD)) 14717683Spst 14817683Spst/* 14917683Spst * Delete 'a' from uset p. 15017683Spst */ 15117683Spst#define SET_DELETE(p, a) \ 15217683Spst(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 15317683Spst 15417683Spst/* 15517683Spst * a := a intersect b 15617683Spst */ 15717683Spst#define SET_INTERSECT(a, b, n)\ 15817683Spst{\ 15917683Spst register bpf_u_int32 *_x = a, *_y = b;\ 16017683Spst register int _n = n;\ 16117683Spst while (--_n >= 0) *_x++ &= *_y++;\ 16217683Spst} 16317683Spst 16417683Spst/* 16517683Spst * a := a - b 16617683Spst */ 16717683Spst#define SET_SUBTRACT(a, b, n)\ 16817683Spst{\ 16917683Spst register bpf_u_int32 *_x = a, *_y = b;\ 17017683Spst register int _n = n;\ 17117683Spst while (--_n >= 0) *_x++ &=~ *_y++;\ 17217683Spst} 17317683Spst 17417683Spst/* 17517683Spst * a := a union b 17617683Spst */ 17717683Spst#define SET_UNION(a, b, n)\ 17817683Spst{\ 17917683Spst register bpf_u_int32 *_x = a, *_y = b;\ 18017683Spst register int _n = n;\ 18117683Spst while (--_n >= 0) *_x++ |= *_y++;\ 18217683Spst} 18317683Spst 18417683Spststatic uset all_dom_sets; 18517683Spststatic uset all_closure_sets; 18617683Spststatic uset all_edge_sets; 18717683Spst 18817683Spst#ifndef MAX 18917683Spst#define MAX(a,b) ((a)>(b)?(a):(b)) 19017683Spst#endif 19117683Spst 19217683Spststatic void 193251129Sdelphijfind_levels_r(struct block *b) 19417683Spst{ 19517683Spst int level; 19617683Spst 19717683Spst if (isMarked(b)) 19817683Spst return; 19917683Spst 20017683Spst Mark(b); 20117683Spst b->link = 0; 20217683Spst 20317683Spst if (JT(b)) { 20417683Spst find_levels_r(JT(b)); 20517683Spst find_levels_r(JF(b)); 20617683Spst level = MAX(JT(b)->level, JF(b)->level) + 1; 20717683Spst } else 20817683Spst level = 0; 20917683Spst b->level = level; 21017683Spst b->link = levels[level]; 21117683Spst levels[level] = b; 21217683Spst} 21317683Spst 21417683Spst/* 21517683Spst * Level graph. The levels go from 0 at the leaves to 21617683Spst * N_LEVELS at the root. The levels[] array points to the 21717683Spst * first node of the level list, whose elements are linked 21817683Spst * with the 'link' field of the struct block. 21917683Spst */ 22017683Spststatic void 221251129Sdelphijfind_levels(struct block *root) 22217683Spst{ 22317683Spst memset((char *)levels, 0, n_blocks * sizeof(*levels)); 22417683Spst unMarkAll(); 22517683Spst find_levels_r(root); 22617683Spst} 22717683Spst 22817683Spst/* 22917683Spst * Find dominator relationships. 23017683Spst * Assumes graph has been leveled. 23117683Spst */ 23217683Spststatic void 233251129Sdelphijfind_dom(struct block *root) 23417683Spst{ 23517683Spst int i; 23617683Spst struct block *b; 23717683Spst bpf_u_int32 *x; 23817683Spst 23917683Spst /* 24017683Spst * Initialize sets to contain all nodes. 24117683Spst */ 24217683Spst x = all_dom_sets; 24317683Spst i = n_blocks * nodewords; 24417683Spst while (--i >= 0) 24517683Spst *x++ = ~0; 24617683Spst /* Root starts off empty. */ 24717683Spst for (i = nodewords; --i >= 0;) 24817683Spst root->dom[i] = 0; 24917683Spst 25017683Spst /* root->level is the highest level no found. */ 25117683Spst for (i = root->level; i >= 0; --i) { 25217683Spst for (b = levels[i]; b; b = b->link) { 25317683Spst SET_INSERT(b->dom, b->id); 25417683Spst if (JT(b) == 0) 25517683Spst continue; 25617683Spst SET_INTERSECT(JT(b)->dom, b->dom, nodewords); 25717683Spst SET_INTERSECT(JF(b)->dom, b->dom, nodewords); 25817683Spst } 25917683Spst } 26017683Spst} 26117683Spst 26217683Spststatic void 263251129Sdelphijpropedom(struct edge *ep) 26417683Spst{ 26517683Spst SET_INSERT(ep->edom, ep->id); 26617683Spst if (ep->succ) { 26717683Spst SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords); 26817683Spst SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords); 26917683Spst } 27017683Spst} 27117683Spst 27217683Spst/* 27317683Spst * Compute edge dominators. 27417683Spst * Assumes graph has been leveled and predecessors established. 27517683Spst */ 27617683Spststatic void 277251129Sdelphijfind_edom(struct block *root) 27817683Spst{ 27917683Spst int i; 28017683Spst uset x; 28117683Spst struct block *b; 28217683Spst 28317683Spst x = all_edge_sets; 28417683Spst for (i = n_edges * edgewords; --i >= 0; ) 28517683Spst x[i] = ~0; 28617683Spst 28717683Spst /* root->level is the highest level no found. */ 28817683Spst memset(root->et.edom, 0, edgewords * sizeof(*(uset)0)); 28917683Spst memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0)); 29017683Spst for (i = root->level; i >= 0; --i) { 29117683Spst for (b = levels[i]; b != 0; b = b->link) { 29217683Spst propedom(&b->et); 29317683Spst propedom(&b->ef); 29417683Spst } 29517683Spst } 29617683Spst} 29717683Spst 29817683Spst/* 29917683Spst * Find the backwards transitive closure of the flow graph. These sets 30017683Spst * are backwards in the sense that we find the set of nodes that reach 30117683Spst * a given node, not the set of nodes that can be reached by a node. 30217683Spst * 30317683Spst * Assumes graph has been leveled. 30417683Spst */ 30517683Spststatic void 306251129Sdelphijfind_closure(struct block *root) 30717683Spst{ 30817683Spst int i; 30917683Spst struct block *b; 31017683Spst 31117683Spst /* 31217683Spst * Initialize sets to contain no nodes. 31317683Spst */ 31417683Spst memset((char *)all_closure_sets, 0, 31517683Spst n_blocks * nodewords * sizeof(*all_closure_sets)); 31617683Spst 31717683Spst /* root->level is the highest level no found. */ 31817683Spst for (i = root->level; i >= 0; --i) { 31917683Spst for (b = levels[i]; b; b = b->link) { 32017683Spst SET_INSERT(b->closure, b->id); 32117683Spst if (JT(b) == 0) 32217683Spst continue; 32317683Spst SET_UNION(JT(b)->closure, b->closure, nodewords); 32417683Spst SET_UNION(JF(b)->closure, b->closure, nodewords); 32517683Spst } 32617683Spst } 32717683Spst} 32817683Spst 32917683Spst/* 33017683Spst * Return the register number that is used by s. If A and X are both 33117683Spst * used, return AX_ATOM. If no register is used, return -1. 33217683Spst * 33317683Spst * The implementation should probably change to an array access. 33417683Spst */ 33517683Spststatic int 336251129Sdelphijatomuse(struct stmt *s) 33717683Spst{ 33817683Spst register int c = s->code; 33917683Spst 34017683Spst if (c == NOP) 34117683Spst return -1; 34217683Spst 34317683Spst switch (BPF_CLASS(c)) { 34417683Spst 34517683Spst case BPF_RET: 34617683Spst return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 34717683Spst (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 34817683Spst 34917683Spst case BPF_LD: 35017683Spst case BPF_LDX: 35117683Spst return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 35217683Spst (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 35317683Spst 35417683Spst case BPF_ST: 35517683Spst return A_ATOM; 35617683Spst 35717683Spst case BPF_STX: 35817683Spst return X_ATOM; 35917683Spst 36017683Spst case BPF_JMP: 36117683Spst case BPF_ALU: 36217683Spst if (BPF_SRC(c) == BPF_X) 36317683Spst return AX_ATOM; 36417683Spst return A_ATOM; 36517683Spst 36617683Spst case BPF_MISC: 36717683Spst return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 36817683Spst } 36917683Spst abort(); 37017683Spst /* NOTREACHED */ 37117683Spst} 37217683Spst 37317683Spst/* 37417683Spst * Return the register number that is defined by 's'. We assume that 37517683Spst * a single stmt cannot define more than one register. If no register 37617683Spst * is defined, return -1. 37717683Spst * 37817683Spst * The implementation should probably change to an array access. 37917683Spst */ 38017683Spststatic int 381251129Sdelphijatomdef(struct stmt *s) 38217683Spst{ 38317683Spst if (s->code == NOP) 38417683Spst return -1; 38517683Spst 38617683Spst switch (BPF_CLASS(s->code)) { 38717683Spst 38817683Spst case BPF_LD: 38917683Spst case BPF_ALU: 39017683Spst return A_ATOM; 39117683Spst 39217683Spst case BPF_LDX: 39317683Spst return X_ATOM; 39417683Spst 39517683Spst case BPF_ST: 39617683Spst case BPF_STX: 39717683Spst return s->k; 39817683Spst 39917683Spst case BPF_MISC: 40017683Spst return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 40117683Spst } 40217683Spst return -1; 40317683Spst} 40417683Spst 405146768Ssam/* 406146768Ssam * Compute the sets of registers used, defined, and killed by 'b'. 407146768Ssam * 408146768Ssam * "Used" means that a statement in 'b' uses the register before any 409146768Ssam * statement in 'b' defines it, i.e. it uses the value left in 410146768Ssam * that register by a predecessor block of this block. 411146768Ssam * "Defined" means that a statement in 'b' defines it. 412146768Ssam * "Killed" means that a statement in 'b' defines it before any 413146768Ssam * statement in 'b' uses it, i.e. it kills the value left in that 414146768Ssam * register by a predecessor block of this block. 415146768Ssam */ 41617683Spststatic void 417251129Sdelphijcompute_local_ud(struct block *b) 41817683Spst{ 41917683Spst struct slist *s; 42017683Spst atomset def = 0, use = 0, kill = 0; 42117683Spst int atom; 42217683Spst 42317683Spst for (s = b->stmts; s; s = s->next) { 42417683Spst if (s->s.code == NOP) 42517683Spst continue; 42617683Spst atom = atomuse(&s->s); 42717683Spst if (atom >= 0) { 42817683Spst if (atom == AX_ATOM) { 42917683Spst if (!ATOMELEM(def, X_ATOM)) 43017683Spst use |= ATOMMASK(X_ATOM); 43117683Spst if (!ATOMELEM(def, A_ATOM)) 43217683Spst use |= ATOMMASK(A_ATOM); 43317683Spst } 43417683Spst else if (atom < N_ATOMS) { 43517683Spst if (!ATOMELEM(def, atom)) 43617683Spst use |= ATOMMASK(atom); 43717683Spst } 43817683Spst else 43917683Spst abort(); 44017683Spst } 44117683Spst atom = atomdef(&s->s); 44217683Spst if (atom >= 0) { 44317683Spst if (!ATOMELEM(use, atom)) 44417683Spst kill |= ATOMMASK(atom); 44517683Spst def |= ATOMMASK(atom); 44617683Spst } 44717683Spst } 448146768Ssam if (BPF_CLASS(b->s.code) == BPF_JMP) { 449146768Ssam /* 450146768Ssam * XXX - what about RET? 451146768Ssam */ 452146768Ssam atom = atomuse(&b->s); 453146768Ssam if (atom >= 0) { 454146768Ssam if (atom == AX_ATOM) { 455146768Ssam if (!ATOMELEM(def, X_ATOM)) 456146768Ssam use |= ATOMMASK(X_ATOM); 457146768Ssam if (!ATOMELEM(def, A_ATOM)) 458146768Ssam use |= ATOMMASK(A_ATOM); 459146768Ssam } 460146768Ssam else if (atom < N_ATOMS) { 461146768Ssam if (!ATOMELEM(def, atom)) 462146768Ssam use |= ATOMMASK(atom); 463146768Ssam } 464146768Ssam else 465146768Ssam abort(); 466146768Ssam } 467146768Ssam } 46817683Spst 46917683Spst b->def = def; 47017683Spst b->kill = kill; 47117683Spst b->in_use = use; 47217683Spst} 47317683Spst 47417683Spst/* 47517683Spst * Assume graph is already leveled. 47617683Spst */ 47717683Spststatic void 478251129Sdelphijfind_ud(struct block *root) 47917683Spst{ 48017683Spst int i, maxlevel; 48117683Spst struct block *p; 48217683Spst 48317683Spst /* 48417683Spst * root->level is the highest level no found; 48517683Spst * count down from there. 48617683Spst */ 48717683Spst maxlevel = root->level; 48817683Spst for (i = maxlevel; i >= 0; --i) 48917683Spst for (p = levels[i]; p; p = p->link) { 49017683Spst compute_local_ud(p); 49117683Spst p->out_use = 0; 49217683Spst } 49317683Spst 49417683Spst for (i = 1; i <= maxlevel; ++i) { 49517683Spst for (p = levels[i]; p; p = p->link) { 49617683Spst p->out_use |= JT(p)->in_use | JF(p)->in_use; 49717683Spst p->in_use |= p->out_use &~ p->kill; 49817683Spst } 49917683Spst } 50017683Spst} 50117683Spst 50217683Spst/* 50317683Spst * These data structures are used in a Cocke and Shwarz style 50417683Spst * value numbering scheme. Since the flowgraph is acyclic, 50517683Spst * exit values can be propagated from a node's predecessors 50617683Spst * provided it is uniquely defined. 50717683Spst */ 50817683Spststruct valnode { 50917683Spst int code; 51017683Spst int v0, v1; 51117683Spst int val; 51217683Spst struct valnode *next; 51317683Spst}; 51417683Spst 51517683Spst#define MODULUS 213 51617683Spststatic struct valnode *hashtbl[MODULUS]; 51717683Spststatic int curval; 51817683Spststatic int maxval; 51917683Spst 52017683Spst/* Integer constants mapped with the load immediate opcode. */ 52117683Spst#define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L) 52217683Spst 52317683Spststruct vmapinfo { 52417683Spst int is_const; 52517683Spst bpf_int32 const_val; 52617683Spst}; 52717683Spst 52817683Spststruct vmapinfo *vmap; 52917683Spststruct valnode *vnode_base; 53017683Spststruct valnode *next_vnode; 53117683Spst 53217683Spststatic void 533251129Sdelphijinit_val(void) 53417683Spst{ 53517683Spst curval = 0; 53617683Spst next_vnode = vnode_base; 53717683Spst memset((char *)vmap, 0, maxval * sizeof(*vmap)); 53817683Spst memset((char *)hashtbl, 0, sizeof hashtbl); 53917683Spst} 54017683Spst 54117683Spst/* Because we really don't have an IR, this stuff is a little messy. */ 54217683Spststatic int 543251129SdelphijF(int code, int v0, int v1) 54417683Spst{ 54517683Spst u_int hash; 54617683Spst int val; 54717683Spst struct valnode *p; 54817683Spst 54917683Spst hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); 55017683Spst hash %= MODULUS; 55117683Spst 55217683Spst for (p = hashtbl[hash]; p; p = p->next) 55317683Spst if (p->code == code && p->v0 == v0 && p->v1 == v1) 55417683Spst return p->val; 55517683Spst 55617683Spst val = ++curval; 55717683Spst if (BPF_MODE(code) == BPF_IMM && 55817683Spst (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { 55917683Spst vmap[val].const_val = v0; 56017683Spst vmap[val].is_const = 1; 56117683Spst } 56217683Spst p = next_vnode++; 56317683Spst p->val = val; 56417683Spst p->code = code; 56517683Spst p->v0 = v0; 56617683Spst p->v1 = v1; 56717683Spst p->next = hashtbl[hash]; 56817683Spst hashtbl[hash] = p; 56917683Spst 57017683Spst return val; 57117683Spst} 57217683Spst 57317683Spststatic inline void 574251129Sdelphijvstore(struct stmt *s, int *valp, int newval, int alter) 57517683Spst{ 57617683Spst if (alter && *valp == newval) 57717683Spst s->code = NOP; 57817683Spst else 57917683Spst *valp = newval; 58017683Spst} 58117683Spst 58217683Spststatic void 583251129Sdelphijfold_op(struct stmt *s, int v0, int v1) 58417683Spst{ 585172677Smlaier bpf_u_int32 a, b; 58617683Spst 58717683Spst a = vmap[v0].const_val; 58817683Spst b = vmap[v1].const_val; 58917683Spst 59017683Spst switch (BPF_OP(s->code)) { 59117683Spst case BPF_ADD: 59217683Spst a += b; 59317683Spst break; 59417683Spst 59517683Spst case BPF_SUB: 59617683Spst a -= b; 59717683Spst break; 59817683Spst 59917683Spst case BPF_MUL: 60017683Spst a *= b; 60117683Spst break; 60217683Spst 60317683Spst case BPF_DIV: 60417683Spst if (b == 0) 60517683Spst bpf_error("division by zero"); 60617683Spst a /= b; 60717683Spst break; 60817683Spst 60917683Spst case BPF_AND: 61017683Spst a &= b; 61117683Spst break; 61217683Spst 61317683Spst case BPF_OR: 61417683Spst a |= b; 61517683Spst break; 61617683Spst 61717683Spst case BPF_LSH: 61817683Spst a <<= b; 61917683Spst break; 62017683Spst 62117683Spst case BPF_RSH: 62217683Spst a >>= b; 62317683Spst break; 62417683Spst 62517683Spst case BPF_NEG: 62617683Spst a = -a; 62717683Spst break; 62817683Spst 62917683Spst default: 63017683Spst abort(); 63117683Spst } 63217683Spst s->k = a; 63317683Spst s->code = BPF_LD|BPF_IMM; 63417683Spst done = 0; 63517683Spst} 63617683Spst 63717683Spststatic inline struct slist * 638251129Sdelphijthis_op(struct slist *s) 63917683Spst{ 64017683Spst while (s != 0 && s->s.code == NOP) 64117683Spst s = s->next; 64217683Spst return s; 64317683Spst} 64417683Spst 64517683Spststatic void 646251129Sdelphijopt_not(struct block *b) 64717683Spst{ 64817683Spst struct block *tmp = JT(b); 64917683Spst 65017683Spst JT(b) = JF(b); 65117683Spst JF(b) = tmp; 65217683Spst} 65317683Spst 65417683Spststatic void 655251129Sdelphijopt_peep(struct block *b) 65617683Spst{ 65717683Spst struct slist *s; 65817683Spst struct slist *next, *last; 65917683Spst int val; 66017683Spst 66117683Spst s = b->stmts; 66217683Spst if (s == 0) 66317683Spst return; 66417683Spst 66517683Spst last = s; 666146768Ssam for (/*empty*/; /*empty*/; s = next) { 667146768Ssam /* 668146768Ssam * Skip over nops. 669146768Ssam */ 67017683Spst s = this_op(s); 67117683Spst if (s == 0) 672146768Ssam break; /* nothing left in the block */ 673146768Ssam 674146768Ssam /* 675146768Ssam * Find the next real instruction after that one 676146768Ssam * (skipping nops). 677146768Ssam */ 67817683Spst next = this_op(s->next); 67917683Spst if (next == 0) 680146768Ssam break; /* no next instruction */ 68117683Spst last = next; 68217683Spst 68317683Spst /* 68417683Spst * st M[k] --> st M[k] 68517683Spst * ldx M[k] tax 68617683Spst */ 68717683Spst if (s->s.code == BPF_ST && 68817683Spst next->s.code == (BPF_LDX|BPF_MEM) && 68917683Spst s->s.k == next->s.k) { 69017683Spst done = 0; 69117683Spst next->s.code = BPF_MISC|BPF_TAX; 69217683Spst } 69317683Spst /* 69417683Spst * ld #k --> ldx #k 69517683Spst * tax txa 69617683Spst */ 69717683Spst if (s->s.code == (BPF_LD|BPF_IMM) && 69817683Spst next->s.code == (BPF_MISC|BPF_TAX)) { 69917683Spst s->s.code = BPF_LDX|BPF_IMM; 70017683Spst next->s.code = BPF_MISC|BPF_TXA; 70117683Spst done = 0; 70217683Spst } 70317683Spst /* 70417683Spst * This is an ugly special case, but it happens 70517683Spst * when you say tcp[k] or udp[k] where k is a constant. 70617683Spst */ 70717683Spst if (s->s.code == (BPF_LD|BPF_IMM)) { 70817683Spst struct slist *add, *tax, *ild; 70917683Spst 71017683Spst /* 71117683Spst * Check that X isn't used on exit from this 71217683Spst * block (which the optimizer might cause). 71317683Spst * We know the code generator won't generate 71417683Spst * any local dependencies. 71517683Spst */ 71617683Spst if (ATOMELEM(b->out_use, X_ATOM)) 717146768Ssam continue; 71817683Spst 719146768Ssam /* 720146768Ssam * Check that the instruction following the ldi 721146768Ssam * is an addx, or it's an ldxms with an addx 722146768Ssam * following it (with 0 or more nops between the 723146768Ssam * ldxms and addx). 724146768Ssam */ 72517683Spst if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) 72617683Spst add = next; 72717683Spst else 72817683Spst add = this_op(next->next); 72917683Spst if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) 730146768Ssam continue; 73117683Spst 732146768Ssam /* 733146768Ssam * Check that a tax follows that (with 0 or more 734146768Ssam * nops between them). 735146768Ssam */ 73617683Spst tax = this_op(add->next); 73717683Spst if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) 738146768Ssam continue; 73917683Spst 740146768Ssam /* 741146768Ssam * Check that an ild follows that (with 0 or more 742146768Ssam * nops between them). 743146768Ssam */ 74417683Spst ild = this_op(tax->next); 74517683Spst if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || 74617683Spst BPF_MODE(ild->s.code) != BPF_IND) 747146768Ssam continue; 74817683Spst /* 74917683Spst * We want to turn this sequence: 75017683Spst * 75117683Spst * (004) ldi #0x2 {s} 75217683Spst * (005) ldxms [14] {next} -- optional 75317683Spst * (006) addx {add} 75417683Spst * (007) tax {tax} 75517683Spst * (008) ild [x+0] {ild} 75617683Spst * 75717683Spst * into this sequence: 75817683Spst * 75917683Spst * (004) nop 76017683Spst * (005) ldxms [14] 76117683Spst * (006) nop 76217683Spst * (007) nop 76317683Spst * (008) ild [x+2] 76417683Spst * 765146768Ssam * XXX We need to check that X is not 766146768Ssam * subsequently used, because we want to change 767146768Ssam * what'll be in it after this sequence. 768146768Ssam * 769146768Ssam * We know we can eliminate the accumulator 770146768Ssam * modifications earlier in the sequence since 771146768Ssam * it is defined by the last stmt of this sequence 772146768Ssam * (i.e., the last statement of the sequence loads 773146768Ssam * a value into the accumulator, so we can eliminate 774146768Ssam * earlier operations on the accumulator). 77517683Spst */ 77617683Spst ild->s.k += s->s.k; 77717683Spst s->s.code = NOP; 77817683Spst add->s.code = NOP; 77917683Spst tax->s.code = NOP; 78017683Spst done = 0; 78117683Spst } 78217683Spst } 78317683Spst /* 784146768Ssam * If the comparison at the end of a block is an equality 785146768Ssam * comparison against a constant, and nobody uses the value 786146768Ssam * we leave in the A register at the end of a block, and 787146768Ssam * the operation preceding the comparison is an arithmetic 788146768Ssam * operation, we can sometime optimize it away. 78917683Spst */ 790146768Ssam if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) && 791146768Ssam !ATOMELEM(b->out_use, A_ATOM)) { 792146768Ssam /* 793146768Ssam * We can optimize away certain subtractions of the 794146768Ssam * X register. 795146768Ssam */ 796146768Ssam if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) { 797127664Sbms val = b->val[X_ATOM]; 798127664Sbms if (vmap[val].is_const) { 799127664Sbms /* 800146768Ssam * If we have a subtract to do a comparison, 801146768Ssam * and the X register is a known constant, 802146768Ssam * we can merge this value into the 803146768Ssam * comparison: 804146768Ssam * 805127664Sbms * sub x -> nop 806127664Sbms * jeq #y jeq #(x+y) 807127664Sbms */ 808127664Sbms b->s.k += vmap[val].const_val; 809127664Sbms last->s.code = NOP; 810127664Sbms done = 0; 811127664Sbms } else if (b->s.k == 0) { 812127664Sbms /* 813146768Ssam * If the X register isn't a constant, 814146768Ssam * and the comparison in the test is 815146768Ssam * against 0, we can compare with the 816146768Ssam * X register, instead: 817146768Ssam * 818146768Ssam * sub x -> nop 819146768Ssam * jeq #0 jeq x 820127664Sbms */ 821127664Sbms last->s.code = NOP; 822146768Ssam b->s.code = BPF_JMP|BPF_JEQ|BPF_X; 823127664Sbms done = 0; 824127664Sbms } 825127664Sbms } 826127664Sbms /* 827146768Ssam * Likewise, a constant subtract can be simplified: 828146768Ssam * 829146768Ssam * sub #x -> nop 830146768Ssam * jeq #y -> jeq #(x+y) 831127664Sbms */ 832146768Ssam else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) { 83317683Spst last->s.code = NOP; 834127664Sbms b->s.k += last->s.k; 83517683Spst done = 0; 83617683Spst } 837146768Ssam /* 838146768Ssam * And, similarly, a constant AND can be simplified 839146768Ssam * if we're testing against 0, i.e.: 840146768Ssam * 841146768Ssam * and #k nop 842146768Ssam * jeq #0 -> jset #k 843146768Ssam */ 844146768Ssam else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && 845146768Ssam b->s.k == 0) { 846146768Ssam b->s.k = last->s.k; 847146768Ssam b->s.code = BPF_JMP|BPF_K|BPF_JSET; 848146768Ssam last->s.code = NOP; 849146768Ssam done = 0; 850146768Ssam opt_not(b); 851146768Ssam } 85217683Spst } 85317683Spst /* 854127664Sbms * jset #0 -> never 855127664Sbms * jset #ffffffff -> always 856127664Sbms */ 857127664Sbms if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) { 858127664Sbms if (b->s.k == 0) 859127664Sbms JT(b) = JF(b); 860127664Sbms if (b->s.k == 0xffffffff) 861127664Sbms JF(b) = JT(b); 862127664Sbms } 863127664Sbms /* 864190225Srpaulo * If we're comparing against the index register, and the index 865190225Srpaulo * register is a known constant, we can just compare against that 866190225Srpaulo * constant. 867190225Srpaulo */ 868190225Srpaulo val = b->val[X_ATOM]; 869190225Srpaulo if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) { 870190225Srpaulo bpf_int32 v = vmap[val].const_val; 871190225Srpaulo b->s.code &= ~BPF_X; 872190225Srpaulo b->s.k = v; 873190225Srpaulo } 874190225Srpaulo /* 87517683Spst * If the accumulator is a known constant, we can compute the 87617683Spst * comparison result. 87717683Spst */ 87817683Spst val = b->val[A_ATOM]; 87917683Spst if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 88017683Spst bpf_int32 v = vmap[val].const_val; 88117683Spst switch (BPF_OP(b->s.code)) { 88217683Spst 88317683Spst case BPF_JEQ: 88417683Spst v = v == b->s.k; 88517683Spst break; 88617683Spst 88717683Spst case BPF_JGT: 88817683Spst v = (unsigned)v > b->s.k; 88917683Spst break; 89017683Spst 89117683Spst case BPF_JGE: 89217683Spst v = (unsigned)v >= b->s.k; 89317683Spst break; 89417683Spst 89517683Spst case BPF_JSET: 89617683Spst v &= b->s.k; 89717683Spst break; 89817683Spst 89917683Spst default: 90017683Spst abort(); 90117683Spst } 90217683Spst if (JF(b) != JT(b)) 90317683Spst done = 0; 90417683Spst if (v) 90517683Spst JF(b) = JT(b); 90617683Spst else 90717683Spst JT(b) = JF(b); 90817683Spst } 90917683Spst} 91017683Spst 91117683Spst/* 91217683Spst * Compute the symbolic value of expression of 's', and update 91317683Spst * anything it defines in the value table 'val'. If 'alter' is true, 91417683Spst * do various optimizations. This code would be cleaner if symbolic 91517683Spst * evaluation and code transformations weren't folded together. 91617683Spst */ 91717683Spststatic void 918251129Sdelphijopt_stmt(struct stmt *s, int val[], int alter) 91917683Spst{ 92017683Spst int op; 92117683Spst int v; 92217683Spst 92317683Spst switch (s->code) { 92417683Spst 92517683Spst case BPF_LD|BPF_ABS|BPF_W: 92617683Spst case BPF_LD|BPF_ABS|BPF_H: 92717683Spst case BPF_LD|BPF_ABS|BPF_B: 92817683Spst v = F(s->code, s->k, 0L); 92917683Spst vstore(s, &val[A_ATOM], v, alter); 93017683Spst break; 93117683Spst 93217683Spst case BPF_LD|BPF_IND|BPF_W: 93317683Spst case BPF_LD|BPF_IND|BPF_H: 93417683Spst case BPF_LD|BPF_IND|BPF_B: 93517683Spst v = val[X_ATOM]; 93617683Spst if (alter && vmap[v].is_const) { 93717683Spst s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); 93817683Spst s->k += vmap[v].const_val; 93917683Spst v = F(s->code, s->k, 0L); 94017683Spst done = 0; 94117683Spst } 94217683Spst else 94317683Spst v = F(s->code, s->k, v); 94417683Spst vstore(s, &val[A_ATOM], v, alter); 94517683Spst break; 94617683Spst 94717683Spst case BPF_LD|BPF_LEN: 94817683Spst v = F(s->code, 0L, 0L); 94917683Spst vstore(s, &val[A_ATOM], v, alter); 95017683Spst break; 95117683Spst 95217683Spst case BPF_LD|BPF_IMM: 95317683Spst v = K(s->k); 95417683Spst vstore(s, &val[A_ATOM], v, alter); 95517683Spst break; 95617683Spst 95717683Spst case BPF_LDX|BPF_IMM: 95817683Spst v = K(s->k); 95917683Spst vstore(s, &val[X_ATOM], v, alter); 96017683Spst break; 96117683Spst 96217683Spst case BPF_LDX|BPF_MSH|BPF_B: 96317683Spst v = F(s->code, s->k, 0L); 96417683Spst vstore(s, &val[X_ATOM], v, alter); 96517683Spst break; 96617683Spst 96717683Spst case BPF_ALU|BPF_NEG: 96817683Spst if (alter && vmap[val[A_ATOM]].is_const) { 96917683Spst s->code = BPF_LD|BPF_IMM; 97017683Spst s->k = -vmap[val[A_ATOM]].const_val; 97117683Spst val[A_ATOM] = K(s->k); 97217683Spst } 97317683Spst else 97417683Spst val[A_ATOM] = F(s->code, val[A_ATOM], 0L); 97517683Spst break; 97617683Spst 97717683Spst case BPF_ALU|BPF_ADD|BPF_K: 97817683Spst case BPF_ALU|BPF_SUB|BPF_K: 97917683Spst case BPF_ALU|BPF_MUL|BPF_K: 98017683Spst case BPF_ALU|BPF_DIV|BPF_K: 98117683Spst case BPF_ALU|BPF_AND|BPF_K: 98217683Spst case BPF_ALU|BPF_OR|BPF_K: 98317683Spst case BPF_ALU|BPF_LSH|BPF_K: 98417683Spst case BPF_ALU|BPF_RSH|BPF_K: 98517683Spst op = BPF_OP(s->code); 98617683Spst if (alter) { 98717683Spst if (s->k == 0) { 98898530Sfenner /* don't optimize away "sub #0" 98998530Sfenner * as it may be needed later to 99098530Sfenner * fixup the generated math code */ 99198530Sfenner if (op == BPF_ADD || 99217683Spst op == BPF_LSH || op == BPF_RSH || 99317683Spst op == BPF_OR) { 99417683Spst s->code = NOP; 99517683Spst break; 99617683Spst } 99717683Spst if (op == BPF_MUL || op == BPF_AND) { 99817683Spst s->code = BPF_LD|BPF_IMM; 99917683Spst val[A_ATOM] = K(s->k); 100017683Spst break; 100117683Spst } 100217683Spst } 100317683Spst if (vmap[val[A_ATOM]].is_const) { 100417683Spst fold_op(s, val[A_ATOM], K(s->k)); 100517683Spst val[A_ATOM] = K(s->k); 100617683Spst break; 100717683Spst } 100817683Spst } 100917683Spst val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); 101017683Spst break; 101117683Spst 101217683Spst case BPF_ALU|BPF_ADD|BPF_X: 101317683Spst case BPF_ALU|BPF_SUB|BPF_X: 101417683Spst case BPF_ALU|BPF_MUL|BPF_X: 101517683Spst case BPF_ALU|BPF_DIV|BPF_X: 101617683Spst case BPF_ALU|BPF_AND|BPF_X: 101717683Spst case BPF_ALU|BPF_OR|BPF_X: 101817683Spst case BPF_ALU|BPF_LSH|BPF_X: 101917683Spst case BPF_ALU|BPF_RSH|BPF_X: 102017683Spst op = BPF_OP(s->code); 102117683Spst if (alter && vmap[val[X_ATOM]].is_const) { 102217683Spst if (vmap[val[A_ATOM]].is_const) { 102317683Spst fold_op(s, val[A_ATOM], val[X_ATOM]); 102417683Spst val[A_ATOM] = K(s->k); 102517683Spst } 102617683Spst else { 102717683Spst s->code = BPF_ALU|BPF_K|op; 102817683Spst s->k = vmap[val[X_ATOM]].const_val; 102917683Spst done = 0; 103017683Spst val[A_ATOM] = 103117683Spst F(s->code, val[A_ATOM], K(s->k)); 103217683Spst } 103317683Spst break; 103417683Spst } 103517683Spst /* 103617683Spst * Check if we're doing something to an accumulator 103717683Spst * that is 0, and simplify. This may not seem like 103817683Spst * much of a simplification but it could open up further 103917683Spst * optimizations. 1040127664Sbms * XXX We could also check for mul by 1, etc. 104117683Spst */ 104217683Spst if (alter && vmap[val[A_ATOM]].is_const 104317683Spst && vmap[val[A_ATOM]].const_val == 0) { 1044127664Sbms if (op == BPF_ADD || op == BPF_OR) { 104517683Spst s->code = BPF_MISC|BPF_TXA; 104617683Spst vstore(s, &val[A_ATOM], val[X_ATOM], alter); 104717683Spst break; 104817683Spst } 104917683Spst else if (op == BPF_MUL || op == BPF_DIV || 1050127664Sbms op == BPF_AND || op == BPF_LSH || op == BPF_RSH) { 105117683Spst s->code = BPF_LD|BPF_IMM; 105217683Spst s->k = 0; 105317683Spst vstore(s, &val[A_ATOM], K(s->k), alter); 105417683Spst break; 105517683Spst } 105617683Spst else if (op == BPF_NEG) { 105717683Spst s->code = NOP; 105817683Spst break; 105917683Spst } 106017683Spst } 106117683Spst val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); 106217683Spst break; 106317683Spst 106417683Spst case BPF_MISC|BPF_TXA: 106517683Spst vstore(s, &val[A_ATOM], val[X_ATOM], alter); 106617683Spst break; 106717683Spst 106817683Spst case BPF_LD|BPF_MEM: 106917683Spst v = val[s->k]; 107017683Spst if (alter && vmap[v].is_const) { 107117683Spst s->code = BPF_LD|BPF_IMM; 107217683Spst s->k = vmap[v].const_val; 107317683Spst done = 0; 107417683Spst } 107517683Spst vstore(s, &val[A_ATOM], v, alter); 107617683Spst break; 107717683Spst 107817683Spst case BPF_MISC|BPF_TAX: 107917683Spst vstore(s, &val[X_ATOM], val[A_ATOM], alter); 108017683Spst break; 108117683Spst 108217683Spst case BPF_LDX|BPF_MEM: 108317683Spst v = val[s->k]; 108417683Spst if (alter && vmap[v].is_const) { 108517683Spst s->code = BPF_LDX|BPF_IMM; 108617683Spst s->k = vmap[v].const_val; 108717683Spst done = 0; 108817683Spst } 108917683Spst vstore(s, &val[X_ATOM], v, alter); 109017683Spst break; 109117683Spst 109217683Spst case BPF_ST: 109317683Spst vstore(s, &val[s->k], val[A_ATOM], alter); 109417683Spst break; 109517683Spst 109617683Spst case BPF_STX: 109717683Spst vstore(s, &val[s->k], val[X_ATOM], alter); 109817683Spst break; 109917683Spst } 110017683Spst} 110117683Spst 110217683Spststatic void 1103251129Sdelphijdeadstmt(register struct stmt *s, register struct stmt *last[]) 110417683Spst{ 110517683Spst register int atom; 110617683Spst 110717683Spst atom = atomuse(s); 110817683Spst if (atom >= 0) { 110917683Spst if (atom == AX_ATOM) { 111017683Spst last[X_ATOM] = 0; 111117683Spst last[A_ATOM] = 0; 111217683Spst } 111317683Spst else 111417683Spst last[atom] = 0; 111517683Spst } 111617683Spst atom = atomdef(s); 111717683Spst if (atom >= 0) { 111817683Spst if (last[atom]) { 111917683Spst done = 0; 112017683Spst last[atom]->code = NOP; 112117683Spst } 112217683Spst last[atom] = s; 112317683Spst } 112417683Spst} 112517683Spst 112617683Spststatic void 1127251129Sdelphijopt_deadstores(register struct block *b) 112817683Spst{ 112917683Spst register struct slist *s; 113017683Spst register int atom; 113117683Spst struct stmt *last[N_ATOMS]; 113217683Spst 113317683Spst memset((char *)last, 0, sizeof last); 113417683Spst 113517683Spst for (s = b->stmts; s != 0; s = s->next) 113617683Spst deadstmt(&s->s, last); 113717683Spst deadstmt(&b->s, last); 113817683Spst 113917683Spst for (atom = 0; atom < N_ATOMS; ++atom) 114017683Spst if (last[atom] && !ATOMELEM(b->out_use, atom)) { 114117683Spst last[atom]->code = NOP; 114217683Spst done = 0; 114317683Spst } 114417683Spst} 114517683Spst 114617683Spststatic void 1147251129Sdelphijopt_blk(struct block *b, int do_stmts) 114817683Spst{ 114917683Spst struct slist *s; 115017683Spst struct edge *p; 115117683Spst int i; 1152146768Ssam bpf_int32 aval, xval; 115317683Spst 115456889Sfenner#if 0 115556889Sfenner for (s = b->stmts; s && s->next; s = s->next) 115656889Sfenner if (BPF_CLASS(s->s.code) == BPF_JMP) { 115756889Sfenner do_stmts = 0; 115856889Sfenner break; 115956889Sfenner } 116056889Sfenner#endif 116156889Sfenner 116217683Spst /* 116317683Spst * Initialize the atom values. 116417683Spst */ 116517683Spst p = b->in_edges; 1166146768Ssam if (p == 0) { 1167146768Ssam /* 1168146768Ssam * We have no predecessors, so everything is undefined 1169146768Ssam * upon entry to this block. 1170146768Ssam */ 117117683Spst memset((char *)b->val, 0, sizeof(b->val)); 1172146768Ssam } else { 1173146768Ssam /* 1174146768Ssam * Inherit values from our predecessors. 1175146768Ssam * 1176146768Ssam * First, get the values from the predecessor along the 1177146768Ssam * first edge leading to this node. 1178146768Ssam */ 117917683Spst memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); 1180146768Ssam /* 1181146768Ssam * Now look at all the other nodes leading to this node. 1182146768Ssam * If, for the predecessor along that edge, a register 1183146768Ssam * has a different value from the one we have (i.e., 1184146768Ssam * control paths are merging, and the merging paths 1185146768Ssam * assign different values to that register), give the 1186146768Ssam * register the undefined value of 0. 1187146768Ssam */ 118817683Spst while ((p = p->next) != NULL) { 118917683Spst for (i = 0; i < N_ATOMS; ++i) 119017683Spst if (b->val[i] != p->pred->val[i]) 119117683Spst b->val[i] = 0; 119217683Spst } 119317683Spst } 119417683Spst aval = b->val[A_ATOM]; 1195146768Ssam xval = b->val[X_ATOM]; 119617683Spst for (s = b->stmts; s; s = s->next) 119717683Spst opt_stmt(&s->s, b->val, do_stmts); 119817683Spst 119917683Spst /* 120017683Spst * This is a special case: if we don't use anything from this 1201146768Ssam * block, and we load the accumulator or index register with a 1202146768Ssam * value that is already there, or if this block is a return, 120317683Spst * eliminate all the statements. 1204146768Ssam * 1205146768Ssam * XXX - what if it does a store? 1206146768Ssam * 1207146768Ssam * XXX - why does it matter whether we use anything from this 1208146768Ssam * block? If the accumulator or index register doesn't change 1209146768Ssam * its value, isn't that OK even if we use that value? 1210146768Ssam * 1211146768Ssam * XXX - if we load the accumulator with a different value, 1212146768Ssam * and the block ends with a conditional branch, we obviously 1213146768Ssam * can't eliminate it, as the branch depends on that value. 1214146768Ssam * For the index register, the conditional branch only depends 1215146768Ssam * on the index register value if the test is against the index 1216146768Ssam * register value rather than a constant; if nothing uses the 1217146768Ssam * value we put into the index register, and we're not testing 1218146768Ssam * against the index register's value, and there aren't any 1219146768Ssam * other problems that would keep us from eliminating this 1220146768Ssam * block, can we eliminate it? 122117683Spst */ 1222127664Sbms if (do_stmts && 1223146768Ssam ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval && 1224146768Ssam xval != 0 && b->val[X_ATOM] == xval) || 122517683Spst BPF_CLASS(b->s.code) == BPF_RET)) { 122617683Spst if (b->stmts != 0) { 122717683Spst b->stmts = 0; 122817683Spst done = 0; 122917683Spst } 123017683Spst } else { 123117683Spst opt_peep(b); 123217683Spst opt_deadstores(b); 123317683Spst } 123417683Spst /* 123517683Spst * Set up values for branch optimizer. 123617683Spst */ 123717683Spst if (BPF_SRC(b->s.code) == BPF_K) 123817683Spst b->oval = K(b->s.k); 123917683Spst else 124017683Spst b->oval = b->val[X_ATOM]; 124117683Spst b->et.code = b->s.code; 124217683Spst b->ef.code = -b->s.code; 124317683Spst} 124417683Spst 124517683Spst/* 124617683Spst * Return true if any register that is used on exit from 'succ', has 124717683Spst * an exit value that is different from the corresponding exit value 124817683Spst * from 'b'. 124917683Spst */ 125017683Spststatic int 1251251129Sdelphijuse_conflict(struct block *b, struct block *succ) 125217683Spst{ 125317683Spst int atom; 125417683Spst atomset use = succ->out_use; 125517683Spst 125617683Spst if (use == 0) 125717683Spst return 0; 125817683Spst 125917683Spst for (atom = 0; atom < N_ATOMS; ++atom) 126017683Spst if (ATOMELEM(use, atom)) 126117683Spst if (b->val[atom] != succ->val[atom]) 126217683Spst return 1; 126317683Spst return 0; 126417683Spst} 126517683Spst 126617683Spststatic struct block * 1267251129Sdelphijfold_edge(struct block *child, struct edge *ep) 126817683Spst{ 126917683Spst int sense; 127017683Spst int aval0, aval1, oval0, oval1; 127117683Spst int code = ep->code; 127217683Spst 127317683Spst if (code < 0) { 127417683Spst code = -code; 127517683Spst sense = 0; 127617683Spst } else 127717683Spst sense = 1; 127817683Spst 127917683Spst if (child->s.code != code) 128017683Spst return 0; 128117683Spst 128217683Spst aval0 = child->val[A_ATOM]; 128317683Spst oval0 = child->oval; 128417683Spst aval1 = ep->pred->val[A_ATOM]; 128517683Spst oval1 = ep->pred->oval; 128617683Spst 128717683Spst if (aval0 != aval1) 128817683Spst return 0; 128917683Spst 129017683Spst if (oval0 == oval1) 129117683Spst /* 1292146768Ssam * The operands of the branch instructions are 1293146768Ssam * identical, so the result is true if a true 1294146768Ssam * branch was taken to get here, otherwise false. 129517683Spst */ 129617683Spst return sense ? JT(child) : JF(child); 129717683Spst 129817683Spst if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) 129917683Spst /* 130017683Spst * At this point, we only know the comparison if we 130117683Spst * came down the true branch, and it was an equality 1302146768Ssam * comparison with a constant. 1303146768Ssam * 1304146768Ssam * I.e., if we came down the true branch, and the branch 1305146768Ssam * was an equality comparison with a constant, we know the 1306146768Ssam * accumulator contains that constant. If we came down 1307146768Ssam * the false branch, or the comparison wasn't with a 1308146768Ssam * constant, we don't know what was in the accumulator. 1309146768Ssam * 1310146768Ssam * We rely on the fact that distinct constants have distinct 1311146768Ssam * value numbers. 131217683Spst */ 131317683Spst return JF(child); 131417683Spst 131517683Spst return 0; 131617683Spst} 131717683Spst 131817683Spststatic void 1319251129Sdelphijopt_j(struct edge *ep) 132017683Spst{ 132117683Spst register int i, k; 132217683Spst register struct block *target; 132317683Spst 132417683Spst if (JT(ep->succ) == 0) 132517683Spst return; 132617683Spst 132717683Spst if (JT(ep->succ) == JF(ep->succ)) { 132817683Spst /* 132917683Spst * Common branch targets can be eliminated, provided 133017683Spst * there is no data dependency. 133117683Spst */ 133217683Spst if (!use_conflict(ep->pred, ep->succ->et.succ)) { 133317683Spst done = 0; 133417683Spst ep->succ = JT(ep->succ); 133517683Spst } 133617683Spst } 133717683Spst /* 133817683Spst * For each edge dominator that matches the successor of this 133917683Spst * edge, promote the edge successor to the its grandchild. 134017683Spst * 134117683Spst * XXX We violate the set abstraction here in favor a reasonably 134217683Spst * efficient loop. 134317683Spst */ 134417683Spst top: 134517683Spst for (i = 0; i < edgewords; ++i) { 134617683Spst register bpf_u_int32 x = ep->edom[i]; 134717683Spst 134817683Spst while (x != 0) { 134917683Spst k = ffs(x) - 1; 135017683Spst x &=~ (1 << k); 135117683Spst k += i * BITS_PER_WORD; 135217683Spst 135317683Spst target = fold_edge(ep->succ, edges[k]); 135417683Spst /* 135517683Spst * Check that there is no data dependency between 135617683Spst * nodes that will be violated if we move the edge. 135717683Spst */ 135817683Spst if (target != 0 && !use_conflict(ep->pred, target)) { 135917683Spst done = 0; 136017683Spst ep->succ = target; 136117683Spst if (JT(target) != 0) 136217683Spst /* 136317683Spst * Start over unless we hit a leaf. 136417683Spst */ 136517683Spst goto top; 136617683Spst return; 136717683Spst } 136817683Spst } 136917683Spst } 137017683Spst} 137117683Spst 137217683Spst 137317683Spststatic void 1374251129Sdelphijor_pullup(struct block *b) 137517683Spst{ 137617683Spst int val, at_top; 137717683Spst struct block *pull; 137817683Spst struct block **diffp, **samep; 137917683Spst struct edge *ep; 138017683Spst 138117683Spst ep = b->in_edges; 138217683Spst if (ep == 0) 138317683Spst return; 138417683Spst 138517683Spst /* 138617683Spst * Make sure each predecessor loads the same value. 138717683Spst * XXX why? 138817683Spst */ 138917683Spst val = ep->pred->val[A_ATOM]; 139017683Spst for (ep = ep->next; ep != 0; ep = ep->next) 139117683Spst if (val != ep->pred->val[A_ATOM]) 139217683Spst return; 139317683Spst 139417683Spst if (JT(b->in_edges->pred) == b) 139517683Spst diffp = &JT(b->in_edges->pred); 139617683Spst else 139717683Spst diffp = &JF(b->in_edges->pred); 139817683Spst 139917683Spst at_top = 1; 140017683Spst while (1) { 140117683Spst if (*diffp == 0) 140217683Spst return; 140317683Spst 140417683Spst if (JT(*diffp) != JT(b)) 140517683Spst return; 140617683Spst 140717683Spst if (!SET_MEMBER((*diffp)->dom, b->id)) 140817683Spst return; 140917683Spst 141017683Spst if ((*diffp)->val[A_ATOM] != val) 141117683Spst break; 141217683Spst 141317683Spst diffp = &JF(*diffp); 141417683Spst at_top = 0; 141517683Spst } 141617683Spst samep = &JF(*diffp); 141717683Spst while (1) { 141817683Spst if (*samep == 0) 141917683Spst return; 142017683Spst 142117683Spst if (JT(*samep) != JT(b)) 142217683Spst return; 142317683Spst 142417683Spst if (!SET_MEMBER((*samep)->dom, b->id)) 142517683Spst return; 142617683Spst 142717683Spst if ((*samep)->val[A_ATOM] == val) 142817683Spst break; 142917683Spst 143017683Spst /* XXX Need to check that there are no data dependencies 143117683Spst between dp0 and dp1. Currently, the code generator 143217683Spst will not produce such dependencies. */ 143317683Spst samep = &JF(*samep); 143417683Spst } 143517683Spst#ifdef notdef 143617683Spst /* XXX This doesn't cover everything. */ 143717683Spst for (i = 0; i < N_ATOMS; ++i) 143817683Spst if ((*samep)->val[i] != pred->val[i]) 143917683Spst return; 144017683Spst#endif 144117683Spst /* Pull up the node. */ 144217683Spst pull = *samep; 144317683Spst *samep = JF(pull); 144417683Spst JF(pull) = *diffp; 144517683Spst 144617683Spst /* 144717683Spst * At the top of the chain, each predecessor needs to point at the 144817683Spst * pulled up node. Inside the chain, there is only one predecessor 144917683Spst * to worry about. 145017683Spst */ 145117683Spst if (at_top) { 145217683Spst for (ep = b->in_edges; ep != 0; ep = ep->next) { 145317683Spst if (JT(ep->pred) == b) 145417683Spst JT(ep->pred) = pull; 145517683Spst else 145617683Spst JF(ep->pred) = pull; 145717683Spst } 145817683Spst } 145917683Spst else 146017683Spst *diffp = pull; 146117683Spst 146217683Spst done = 0; 146317683Spst} 146417683Spst 146517683Spststatic void 1466251129Sdelphijand_pullup(struct block *b) 146717683Spst{ 146817683Spst int val, at_top; 146917683Spst struct block *pull; 147017683Spst struct block **diffp, **samep; 147117683Spst struct edge *ep; 147217683Spst 147317683Spst ep = b->in_edges; 147417683Spst if (ep == 0) 147517683Spst return; 147617683Spst 147717683Spst /* 147817683Spst * Make sure each predecessor loads the same value. 147917683Spst */ 148017683Spst val = ep->pred->val[A_ATOM]; 148117683Spst for (ep = ep->next; ep != 0; ep = ep->next) 148217683Spst if (val != ep->pred->val[A_ATOM]) 148317683Spst return; 148417683Spst 148517683Spst if (JT(b->in_edges->pred) == b) 148617683Spst diffp = &JT(b->in_edges->pred); 148717683Spst else 148817683Spst diffp = &JF(b->in_edges->pred); 148917683Spst 149017683Spst at_top = 1; 149117683Spst while (1) { 149217683Spst if (*diffp == 0) 149317683Spst return; 149417683Spst 149517683Spst if (JF(*diffp) != JF(b)) 149617683Spst return; 149717683Spst 149817683Spst if (!SET_MEMBER((*diffp)->dom, b->id)) 149917683Spst return; 150017683Spst 150117683Spst if ((*diffp)->val[A_ATOM] != val) 150217683Spst break; 150317683Spst 150417683Spst diffp = &JT(*diffp); 150517683Spst at_top = 0; 150617683Spst } 150717683Spst samep = &JT(*diffp); 150817683Spst while (1) { 150917683Spst if (*samep == 0) 151017683Spst return; 151117683Spst 151217683Spst if (JF(*samep) != JF(b)) 151317683Spst return; 151417683Spst 151517683Spst if (!SET_MEMBER((*samep)->dom, b->id)) 151617683Spst return; 151717683Spst 151817683Spst if ((*samep)->val[A_ATOM] == val) 151917683Spst break; 152017683Spst 152117683Spst /* XXX Need to check that there are no data dependencies 152217683Spst between diffp and samep. Currently, the code generator 152317683Spst will not produce such dependencies. */ 152417683Spst samep = &JT(*samep); 152517683Spst } 152617683Spst#ifdef notdef 152717683Spst /* XXX This doesn't cover everything. */ 152817683Spst for (i = 0; i < N_ATOMS; ++i) 152917683Spst if ((*samep)->val[i] != pred->val[i]) 153017683Spst return; 153117683Spst#endif 153217683Spst /* Pull up the node. */ 153317683Spst pull = *samep; 153417683Spst *samep = JT(pull); 153517683Spst JT(pull) = *diffp; 153617683Spst 153717683Spst /* 153817683Spst * At the top of the chain, each predecessor needs to point at the 153917683Spst * pulled up node. Inside the chain, there is only one predecessor 154017683Spst * to worry about. 154117683Spst */ 154217683Spst if (at_top) { 154317683Spst for (ep = b->in_edges; ep != 0; ep = ep->next) { 154417683Spst if (JT(ep->pred) == b) 154517683Spst JT(ep->pred) = pull; 154617683Spst else 154717683Spst JF(ep->pred) = pull; 154817683Spst } 154917683Spst } 155017683Spst else 155117683Spst *diffp = pull; 155217683Spst 155317683Spst done = 0; 155417683Spst} 155517683Spst 155617683Spststatic void 1557251129Sdelphijopt_blks(struct block *root, int do_stmts) 155817683Spst{ 155917683Spst int i, maxlevel; 156017683Spst struct block *p; 156117683Spst 156217683Spst init_val(); 156317683Spst maxlevel = root->level; 156475107Sfenner 156575107Sfenner find_inedges(root); 156617683Spst for (i = maxlevel; i >= 0; --i) 156717683Spst for (p = levels[i]; p; p = p->link) 156817683Spst opt_blk(p, do_stmts); 156917683Spst 157017683Spst if (do_stmts) 157117683Spst /* 157217683Spst * No point trying to move branches; it can't possibly 157317683Spst * make a difference at this point. 157417683Spst */ 157517683Spst return; 157617683Spst 157717683Spst for (i = 1; i <= maxlevel; ++i) { 157817683Spst for (p = levels[i]; p; p = p->link) { 157917683Spst opt_j(&p->et); 158017683Spst opt_j(&p->ef); 158117683Spst } 158217683Spst } 158375107Sfenner 158475107Sfenner find_inedges(root); 158517683Spst for (i = 1; i <= maxlevel; ++i) { 158617683Spst for (p = levels[i]; p; p = p->link) { 158717683Spst or_pullup(p); 158817683Spst and_pullup(p); 158917683Spst } 159017683Spst } 159117683Spst} 159217683Spst 159317683Spststatic inline void 1594251129Sdelphijlink_inedge(struct edge *parent, struct block *child) 159517683Spst{ 159617683Spst parent->next = child->in_edges; 159717683Spst child->in_edges = parent; 159817683Spst} 159917683Spst 160017683Spststatic void 1601251129Sdelphijfind_inedges(struct block *root) 160217683Spst{ 160317683Spst int i; 160417683Spst struct block *b; 160517683Spst 160617683Spst for (i = 0; i < n_blocks; ++i) 160717683Spst blocks[i]->in_edges = 0; 160817683Spst 160917683Spst /* 161017683Spst * Traverse the graph, adding each edge to the predecessor 161117683Spst * list of its successors. Skip the leaves (i.e. level 0). 161217683Spst */ 161317683Spst for (i = root->level; i > 0; --i) { 161417683Spst for (b = levels[i]; b != 0; b = b->link) { 161517683Spst link_inedge(&b->et, JT(b)); 161617683Spst link_inedge(&b->ef, JF(b)); 161717683Spst } 161817683Spst } 161917683Spst} 162017683Spst 162117683Spststatic void 1622251129Sdelphijopt_root(struct block **b) 162317683Spst{ 162417683Spst struct slist *tmp, *s; 162517683Spst 162617683Spst s = (*b)->stmts; 162717683Spst (*b)->stmts = 0; 162817683Spst while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) 162917683Spst *b = JT(*b); 163017683Spst 163117683Spst tmp = (*b)->stmts; 163217683Spst if (tmp != 0) 163317683Spst sappend(s, tmp); 163417683Spst (*b)->stmts = s; 163517683Spst 163617683Spst /* 163717683Spst * If the root node is a return, then there is no 163817683Spst * point executing any statements (since the bpf machine 163917683Spst * has no side effects). 164017683Spst */ 164117683Spst if (BPF_CLASS((*b)->s.code) == BPF_RET) 164217683Spst (*b)->stmts = 0; 164317683Spst} 164417683Spst 164517683Spststatic void 1646251129Sdelphijopt_loop(struct block *root, int do_stmts) 164717683Spst{ 164817683Spst 164917683Spst#ifdef BDEBUG 165098530Sfenner if (dflag > 1) { 165198530Sfenner printf("opt_loop(root, %d) begin\n", do_stmts); 165217683Spst opt_dump(root); 165398530Sfenner } 165417683Spst#endif 165517683Spst do { 165617683Spst done = 1; 165717683Spst find_levels(root); 165817683Spst find_dom(root); 165917683Spst find_closure(root); 166017683Spst find_ud(root); 166117683Spst find_edom(root); 166217683Spst opt_blks(root, do_stmts); 166317683Spst#ifdef BDEBUG 166498530Sfenner if (dflag > 1) { 166598530Sfenner printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done); 166617683Spst opt_dump(root); 166798530Sfenner } 166817683Spst#endif 166917683Spst } while (!done); 167017683Spst} 167117683Spst 167217683Spst/* 167317683Spst * Optimize the filter code in its dag representation. 167417683Spst */ 167517683Spstvoid 1676251129Sdelphijbpf_optimize(struct block **rootp) 167717683Spst{ 167817683Spst struct block *root; 167917683Spst 168017683Spst root = *rootp; 168117683Spst 168217683Spst opt_init(root); 168317683Spst opt_loop(root, 0); 168417683Spst opt_loop(root, 1); 168517683Spst intern_blocks(root); 168698530Sfenner#ifdef BDEBUG 168798530Sfenner if (dflag > 1) { 168898530Sfenner printf("after intern_blocks()\n"); 168998530Sfenner opt_dump(root); 169098530Sfenner } 169198530Sfenner#endif 169217683Spst opt_root(rootp); 169398530Sfenner#ifdef BDEBUG 169498530Sfenner if (dflag > 1) { 169598530Sfenner printf("after opt_root()\n"); 169698530Sfenner opt_dump(root); 169798530Sfenner } 169898530Sfenner#endif 169917683Spst opt_cleanup(); 170017683Spst} 170117683Spst 170217683Spststatic void 1703251129Sdelphijmake_marks(struct block *p) 170417683Spst{ 170517683Spst if (!isMarked(p)) { 170617683Spst Mark(p); 170717683Spst if (BPF_CLASS(p->s.code) != BPF_RET) { 170817683Spst make_marks(JT(p)); 170917683Spst make_marks(JF(p)); 171017683Spst } 171117683Spst } 171217683Spst} 171317683Spst 171417683Spst/* 171517683Spst * Mark code array such that isMarked(i) is true 171617683Spst * only for nodes that are alive. 171717683Spst */ 171817683Spststatic void 1719251129Sdelphijmark_code(struct block *p) 172017683Spst{ 172117683Spst cur_mark += 1; 172217683Spst make_marks(p); 172317683Spst} 172417683Spst 172517683Spst/* 172617683Spst * True iff the two stmt lists load the same value from the packet into 172717683Spst * the accumulator. 172817683Spst */ 172917683Spststatic int 1730251129Sdelphijeq_slist(struct slist *x, struct slist *y) 173117683Spst{ 173217683Spst while (1) { 173317683Spst while (x && x->s.code == NOP) 173417683Spst x = x->next; 173517683Spst while (y && y->s.code == NOP) 173617683Spst y = y->next; 173717683Spst if (x == 0) 173817683Spst return y == 0; 173917683Spst if (y == 0) 174017683Spst return x == 0; 174117683Spst if (x->s.code != y->s.code || x->s.k != y->s.k) 174217683Spst return 0; 174317683Spst x = x->next; 174417683Spst y = y->next; 174517683Spst } 174617683Spst} 174717683Spst 174817683Spststatic inline int 1749251129Sdelphijeq_blk(struct block *b0, struct block *b1) 175017683Spst{ 175117683Spst if (b0->s.code == b1->s.code && 175217683Spst b0->s.k == b1->s.k && 175317683Spst b0->et.succ == b1->et.succ && 175417683Spst b0->ef.succ == b1->ef.succ) 175517683Spst return eq_slist(b0->stmts, b1->stmts); 175617683Spst return 0; 175717683Spst} 175817683Spst 175917683Spststatic void 1760251129Sdelphijintern_blocks(struct block *root) 176117683Spst{ 176217683Spst struct block *p; 176317683Spst int i, j; 1764172677Smlaier int done1; /* don't shadow global */ 176517683Spst top: 1766172677Smlaier done1 = 1; 176717683Spst for (i = 0; i < n_blocks; ++i) 176817683Spst blocks[i]->link = 0; 176917683Spst 177017683Spst mark_code(root); 177117683Spst 177217683Spst for (i = n_blocks - 1; --i >= 0; ) { 177317683Spst if (!isMarked(blocks[i])) 177417683Spst continue; 177517683Spst for (j = i + 1; j < n_blocks; ++j) { 177617683Spst if (!isMarked(blocks[j])) 177717683Spst continue; 177817683Spst if (eq_blk(blocks[i], blocks[j])) { 177917683Spst blocks[i]->link = blocks[j]->link ? 178017683Spst blocks[j]->link : blocks[j]; 178117683Spst break; 178217683Spst } 178317683Spst } 178417683Spst } 178517683Spst for (i = 0; i < n_blocks; ++i) { 178617683Spst p = blocks[i]; 178717683Spst if (JT(p) == 0) 178817683Spst continue; 178917683Spst if (JT(p)->link) { 1790172677Smlaier done1 = 0; 179117683Spst JT(p) = JT(p)->link; 179217683Spst } 179317683Spst if (JF(p)->link) { 1794172677Smlaier done1 = 0; 179517683Spst JF(p) = JF(p)->link; 179617683Spst } 179717683Spst } 1798172677Smlaier if (!done1) 179917683Spst goto top; 180017683Spst} 180117683Spst 180217683Spststatic void 1803251129Sdelphijopt_cleanup(void) 180417683Spst{ 180517683Spst free((void *)vnode_base); 180617683Spst free((void *)vmap); 180717683Spst free((void *)edges); 180817683Spst free((void *)space); 180917683Spst free((void *)levels); 181017683Spst free((void *)blocks); 181117683Spst} 181217683Spst 181317683Spst/* 181417683Spst * Return the number of stmts in 's'. 181517683Spst */ 1816241231Sdelphijstatic u_int 1817251129Sdelphijslength(struct slist *s) 181817683Spst{ 1819241231Sdelphij u_int n = 0; 182017683Spst 182117683Spst for (; s; s = s->next) 182217683Spst if (s->s.code != NOP) 182317683Spst ++n; 182417683Spst return n; 182517683Spst} 182617683Spst 182717683Spst/* 182817683Spst * Return the number of nodes reachable by 'p'. 182917683Spst * All nodes should be initially unmarked. 183017683Spst */ 183117683Spststatic int 1832251129Sdelphijcount_blocks(struct block *p) 183317683Spst{ 183417683Spst if (p == 0 || isMarked(p)) 183517683Spst return 0; 183617683Spst Mark(p); 183717683Spst return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; 183817683Spst} 183917683Spst 184017683Spst/* 184117683Spst * Do a depth first search on the flow graph, numbering the 184217683Spst * the basic blocks, and entering them into the 'blocks' array.` 184317683Spst */ 184417683Spststatic void 1845251129Sdelphijnumber_blks_r(struct block *p) 184617683Spst{ 184717683Spst int n; 184817683Spst 184917683Spst if (p == 0 || isMarked(p)) 185017683Spst return; 185117683Spst 185217683Spst Mark(p); 185317683Spst n = n_blocks++; 185417683Spst p->id = n; 185517683Spst blocks[n] = p; 185617683Spst 185717683Spst number_blks_r(JT(p)); 185817683Spst number_blks_r(JF(p)); 185917683Spst} 186017683Spst 186117683Spst/* 186217683Spst * Return the number of stmts in the flowgraph reachable by 'p'. 186317683Spst * The nodes should be unmarked before calling. 186475107Sfenner * 186575107Sfenner * Note that "stmts" means "instructions", and that this includes 186675107Sfenner * 186775107Sfenner * side-effect statements in 'p' (slength(p->stmts)); 186875107Sfenner * 186975107Sfenner * statements in the true branch from 'p' (count_stmts(JT(p))); 187075107Sfenner * 187175107Sfenner * statements in the false branch from 'p' (count_stmts(JF(p))); 187275107Sfenner * 187375107Sfenner * the conditional jump itself (1); 187475107Sfenner * 187575107Sfenner * an extra long jump if the true branch requires it (p->longjt); 187675107Sfenner * 187775107Sfenner * an extra long jump if the false branch requires it (p->longjf). 187817683Spst */ 1879241231Sdelphijstatic u_int 1880251129Sdelphijcount_stmts(struct block *p) 188117683Spst{ 1882241231Sdelphij u_int n; 188317683Spst 188417683Spst if (p == 0 || isMarked(p)) 188517683Spst return 0; 188617683Spst Mark(p); 188717683Spst n = count_stmts(JT(p)) + count_stmts(JF(p)); 188875107Sfenner return slength(p->stmts) + n + 1 + p->longjt + p->longjf; 188917683Spst} 189017683Spst 189117683Spst/* 189217683Spst * Allocate memory. All allocation is done before optimization 189317683Spst * is begun. A linear bound on the size of all data structures is computed 189417683Spst * from the total number of blocks and/or statements. 189517683Spst */ 189617683Spststatic void 1897251129Sdelphijopt_init(struct block *root) 189817683Spst{ 189917683Spst bpf_u_int32 *p; 190017683Spst int i, n, max_stmts; 190117683Spst 190217683Spst /* 190317683Spst * First, count the blocks, so we can malloc an array to map 190417683Spst * block number to block. Then, put the blocks into the array. 190517683Spst */ 190617683Spst unMarkAll(); 190717683Spst n = count_blocks(root); 1908172677Smlaier blocks = (struct block **)calloc(n, sizeof(*blocks)); 1909127664Sbms if (blocks == NULL) 1910127664Sbms bpf_error("malloc"); 191117683Spst unMarkAll(); 191217683Spst n_blocks = 0; 191317683Spst number_blks_r(root); 191417683Spst 191517683Spst n_edges = 2 * n_blocks; 1916172677Smlaier edges = (struct edge **)calloc(n_edges, sizeof(*edges)); 1917127664Sbms if (edges == NULL) 1918127664Sbms bpf_error("malloc"); 191917683Spst 192017683Spst /* 192117683Spst * The number of levels is bounded by the number of nodes. 192217683Spst */ 1923172677Smlaier levels = (struct block **)calloc(n_blocks, sizeof(*levels)); 1924127664Sbms if (levels == NULL) 1925127664Sbms bpf_error("malloc"); 192617683Spst 192717683Spst edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1; 192817683Spst nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 192917683Spst 193017683Spst /* XXX */ 193117683Spst space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space) 193217683Spst + n_edges * edgewords * sizeof(*space)); 1933127664Sbms if (space == NULL) 1934127664Sbms bpf_error("malloc"); 193517683Spst p = space; 193617683Spst all_dom_sets = p; 193717683Spst for (i = 0; i < n; ++i) { 193817683Spst blocks[i]->dom = p; 193917683Spst p += nodewords; 194017683Spst } 194117683Spst all_closure_sets = p; 194217683Spst for (i = 0; i < n; ++i) { 194317683Spst blocks[i]->closure = p; 194417683Spst p += nodewords; 194517683Spst } 194617683Spst all_edge_sets = p; 194717683Spst for (i = 0; i < n; ++i) { 194817683Spst register struct block *b = blocks[i]; 194917683Spst 195017683Spst b->et.edom = p; 195117683Spst p += edgewords; 195217683Spst b->ef.edom = p; 195317683Spst p += edgewords; 195417683Spst b->et.id = i; 195517683Spst edges[i] = &b->et; 195617683Spst b->ef.id = n_blocks + i; 195717683Spst edges[n_blocks + i] = &b->ef; 195817683Spst b->et.pred = b; 195917683Spst b->ef.pred = b; 196017683Spst } 196117683Spst max_stmts = 0; 196217683Spst for (i = 0; i < n; ++i) 196317683Spst max_stmts += slength(blocks[i]->stmts) + 1; 196417683Spst /* 196517683Spst * We allocate at most 3 value numbers per statement, 196617683Spst * so this is an upper bound on the number of valnodes 196717683Spst * we'll need. 196817683Spst */ 196917683Spst maxval = 3 * max_stmts; 1970172677Smlaier vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap)); 1971172677Smlaier vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base)); 1972127664Sbms if (vmap == NULL || vnode_base == NULL) 1973127664Sbms bpf_error("malloc"); 197417683Spst} 197517683Spst 197617683Spst/* 197717683Spst * Some pointers used to convert the basic block form of the code, 197817683Spst * into the array form that BPF requires. 'fstart' will point to 197917683Spst * the malloc'd array while 'ftail' is used during the recursive traversal. 198017683Spst */ 198117683Spststatic struct bpf_insn *fstart; 198217683Spststatic struct bpf_insn *ftail; 198317683Spst 198417683Spst#ifdef BDEBUG 198517683Spstint bids[1000]; 198617683Spst#endif 198717683Spst 198817683Spst/* 198917683Spst * Returns true if successful. Returns false if a branch has 199017683Spst * an offset that is too large. If so, we have marked that 199117683Spst * branch so that on a subsequent iteration, it will be treated 199217683Spst * properly. 199317683Spst */ 199417683Spststatic int 1995251129Sdelphijconvert_code_r(struct block *p) 199617683Spst{ 199717683Spst struct bpf_insn *dst; 199817683Spst struct slist *src; 199917683Spst int slen; 200017683Spst u_int off; 200117683Spst int extrajmps; /* number of extra jumps inserted */ 200256889Sfenner struct slist **offset = NULL; 200317683Spst 200417683Spst if (p == 0 || isMarked(p)) 200517683Spst return (1); 200617683Spst Mark(p); 200717683Spst 200817683Spst if (convert_code_r(JF(p)) == 0) 200917683Spst return (0); 201017683Spst if (convert_code_r(JT(p)) == 0) 201117683Spst return (0); 201217683Spst 201317683Spst slen = slength(p->stmts); 201417683Spst dst = ftail -= (slen + 1 + p->longjt + p->longjf); 201517683Spst /* inflate length by any extra jumps */ 201617683Spst 201717683Spst p->offset = dst - fstart; 201817683Spst 201956889Sfenner /* generate offset[] for convenience */ 202056889Sfenner if (slen) { 2021127664Sbms offset = (struct slist **)calloc(slen, sizeof(struct slist *)); 202256889Sfenner if (!offset) { 202356889Sfenner bpf_error("not enough core"); 202456889Sfenner /*NOTREACHED*/ 202556889Sfenner } 202656889Sfenner } 202756889Sfenner src = p->stmts; 202856889Sfenner for (off = 0; off < slen && src; off++) { 202956889Sfenner#if 0 203056889Sfenner printf("off=%d src=%x\n", off, src); 203156889Sfenner#endif 203256889Sfenner offset[off] = src; 203356889Sfenner src = src->next; 203456889Sfenner } 203556889Sfenner 203656889Sfenner off = 0; 203717683Spst for (src = p->stmts; src; src = src->next) { 203817683Spst if (src->s.code == NOP) 203917683Spst continue; 204017683Spst dst->code = (u_short)src->s.code; 204117683Spst dst->k = src->s.k; 204256889Sfenner 204356889Sfenner /* fill block-local relative jump */ 204475107Sfenner if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) { 204556889Sfenner#if 0 204656889Sfenner if (src->s.jt || src->s.jf) { 204756889Sfenner bpf_error("illegal jmp destination"); 204856889Sfenner /*NOTREACHED*/ 204956889Sfenner } 205056889Sfenner#endif 205156889Sfenner goto filled; 205256889Sfenner } 205356889Sfenner if (off == slen - 2) /*???*/ 205456889Sfenner goto filled; 205556889Sfenner 205656889Sfenner { 205756889Sfenner int i; 205856889Sfenner int jt, jf; 2059172677Smlaier const char *ljerr = "%s for block-local relative jump: off=%d"; 206056889Sfenner 206156889Sfenner#if 0 206256889Sfenner printf("code=%x off=%d %x %x\n", src->s.code, 206356889Sfenner off, src->s.jt, src->s.jf); 206456889Sfenner#endif 206556889Sfenner 206656889Sfenner if (!src->s.jt || !src->s.jf) { 206756889Sfenner bpf_error(ljerr, "no jmp destination", off); 206856889Sfenner /*NOTREACHED*/ 206956889Sfenner } 207056889Sfenner 207156889Sfenner jt = jf = 0; 207256889Sfenner for (i = 0; i < slen; i++) { 207356889Sfenner if (offset[i] == src->s.jt) { 207456889Sfenner if (jt) { 207556889Sfenner bpf_error(ljerr, "multiple matches", off); 207656889Sfenner /*NOTREACHED*/ 207756889Sfenner } 207856889Sfenner 207956889Sfenner dst->jt = i - off - 1; 208056889Sfenner jt++; 208156889Sfenner } 208256889Sfenner if (offset[i] == src->s.jf) { 208356889Sfenner if (jf) { 208456889Sfenner bpf_error(ljerr, "multiple matches", off); 208556889Sfenner /*NOTREACHED*/ 208656889Sfenner } 208756889Sfenner dst->jf = i - off - 1; 208856889Sfenner jf++; 208956889Sfenner } 209056889Sfenner } 209156889Sfenner if (!jt || !jf) { 209256889Sfenner bpf_error(ljerr, "no destination found", off); 209356889Sfenner /*NOTREACHED*/ 209456889Sfenner } 209556889Sfenner } 209656889Sfennerfilled: 209717683Spst ++dst; 209856889Sfenner ++off; 209917683Spst } 210056889Sfenner if (offset) 210156889Sfenner free(offset); 210256889Sfenner 210317683Spst#ifdef BDEBUG 210417683Spst bids[dst - fstart] = p->id + 1; 210517683Spst#endif 210617683Spst dst->code = (u_short)p->s.code; 210717683Spst dst->k = p->s.k; 210817683Spst if (JT(p)) { 210917683Spst extrajmps = 0; 211017683Spst off = JT(p)->offset - (p->offset + slen) - 1; 211117683Spst if (off >= 256) { 211217683Spst /* offset too large for branch, must add a jump */ 211317683Spst if (p->longjt == 0) { 211417683Spst /* mark this instruction and retry */ 211517683Spst p->longjt++; 211617683Spst return(0); 211717683Spst } 211817683Spst /* branch if T to following jump */ 211917683Spst dst->jt = extrajmps; 212017683Spst extrajmps++; 212117683Spst dst[extrajmps].code = BPF_JMP|BPF_JA; 212217683Spst dst[extrajmps].k = off - extrajmps; 212317683Spst } 212417683Spst else 212517683Spst dst->jt = off; 212617683Spst off = JF(p)->offset - (p->offset + slen) - 1; 212717683Spst if (off >= 256) { 212817683Spst /* offset too large for branch, must add a jump */ 212917683Spst if (p->longjf == 0) { 213017683Spst /* mark this instruction and retry */ 213117683Spst p->longjf++; 213217683Spst return(0); 213317683Spst } 213417683Spst /* branch if F to following jump */ 213517683Spst /* if two jumps are inserted, F goes to second one */ 213617683Spst dst->jf = extrajmps; 213717683Spst extrajmps++; 213817683Spst dst[extrajmps].code = BPF_JMP|BPF_JA; 213917683Spst dst[extrajmps].k = off - extrajmps; 214017683Spst } 214117683Spst else 214217683Spst dst->jf = off; 214317683Spst } 214417683Spst return (1); 214517683Spst} 214617683Spst 214717683Spst 214817683Spst/* 214917683Spst * Convert flowgraph intermediate representation to the 215017683Spst * BPF array representation. Set *lenp to the number of instructions. 2151172677Smlaier * 2152172677Smlaier * This routine does *NOT* leak the memory pointed to by fp. It *must 2153172677Smlaier * not* do free(fp) before returning fp; doing so would make no sense, 2154172677Smlaier * as the BPF array pointed to by the return value of icode_to_fcode() 2155172677Smlaier * must be valid - it's being returned for use in a bpf_program structure. 2156172677Smlaier * 2157172677Smlaier * If it appears that icode_to_fcode() is leaking, the problem is that 2158172677Smlaier * the program using pcap_compile() is failing to free the memory in 2159172677Smlaier * the BPF program when it's done - the leak is in the program, not in 2160172677Smlaier * the routine that happens to be allocating the memory. (By analogy, if 2161172677Smlaier * a program calls fopen() without ever calling fclose() on the FILE *, 2162172677Smlaier * it will leak the FILE structure; the leak is not in fopen(), it's in 2163172677Smlaier * the program.) Change the program to use pcap_freecode() when it's 2164172677Smlaier * done with the filter program. See the pcap man page. 216517683Spst */ 216617683Spststruct bpf_insn * 2167251129Sdelphijicode_to_fcode(struct block *root, u_int *lenp) 216817683Spst{ 2169241231Sdelphij u_int n; 217017683Spst struct bpf_insn *fp; 217117683Spst 217217683Spst /* 217398530Sfenner * Loop doing convert_code_r() until no branches remain 217417683Spst * with too-large offsets. 217517683Spst */ 217617683Spst while (1) { 217717683Spst unMarkAll(); 217817683Spst n = *lenp = count_stmts(root); 2179127664Sbms 218017683Spst fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); 2181127664Sbms if (fp == NULL) 2182127664Sbms bpf_error("malloc"); 218317683Spst memset((char *)fp, 0, sizeof(*fp) * n); 218417683Spst fstart = fp; 218517683Spst ftail = fp + n; 2186127664Sbms 218717683Spst unMarkAll(); 218817683Spst if (convert_code_r(root)) 218917683Spst break; 219017683Spst free(fp); 219117683Spst } 219217683Spst 219317683Spst return fp; 219417683Spst} 219517683Spst 219675107Sfenner/* 219775107Sfenner * Make a copy of a BPF program and put it in the "fcode" member of 219875107Sfenner * a "pcap_t". 219975107Sfenner * 220075107Sfenner * If we fail to allocate memory for the copy, fill in the "errbuf" 220175107Sfenner * member of the "pcap_t" with an error message, and return -1; 220275107Sfenner * otherwise, return 0. 220375107Sfenner */ 220475107Sfennerint 220575107Sfennerinstall_bpf_program(pcap_t *p, struct bpf_program *fp) 220675107Sfenner{ 220775107Sfenner size_t prog_size; 220875107Sfenner 220975107Sfenner /* 2210190225Srpaulo * Validate the program. 2211190225Srpaulo */ 2212190225Srpaulo if (!bpf_validate(fp->bf_insns, fp->bf_len)) { 2213190225Srpaulo snprintf(p->errbuf, sizeof(p->errbuf), 2214190225Srpaulo "BPF program is not valid"); 2215190225Srpaulo return (-1); 2216190225Srpaulo } 2217190225Srpaulo 2218190225Srpaulo /* 221975107Sfenner * Free up any already installed program. 222075107Sfenner */ 222175107Sfenner pcap_freecode(&p->fcode); 222275107Sfenner 222375107Sfenner prog_size = sizeof(*fp->bf_insns) * fp->bf_len; 222475107Sfenner p->fcode.bf_len = fp->bf_len; 222575107Sfenner p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size); 222675107Sfenner if (p->fcode.bf_insns == NULL) { 222775107Sfenner snprintf(p->errbuf, sizeof(p->errbuf), 222875107Sfenner "malloc: %s", pcap_strerror(errno)); 222975107Sfenner return (-1); 223075107Sfenner } 223175107Sfenner memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size); 223275107Sfenner return (0); 223375107Sfenner} 223475107Sfenner 223517683Spst#ifdef BDEBUG 223617683Spststatic void 2237251129Sdelphijopt_dump(struct block *root) 223817683Spst{ 223917683Spst struct bpf_program f; 224017683Spst 224117683Spst memset(bids, 0, sizeof bids); 224217683Spst f.bf_insns = icode_to_fcode(root, &f.bf_len); 224317683Spst bpf_dump(&f, 1); 224417683Spst putchar('\n'); 224517683Spst free((char *)f.bf_insns); 224617683Spst} 224717683Spst#endif 2248