/******************************************************************************** * * File: optimizer.gen * Title: The optimizer for ODMG'93 OQL. * Programmer: Leonidas Fegaras, UTA * Date: 4/9/97 * *********************************************************************************/ #include #include "evaluation.h" #include "typecheck.h" /* negated_comparisons binds boolean comparisons to their negated forms */ string ngv[][2] = {{"eq","neq"},{"neq","eq"},{"lt","geq"}, {"leq","gt"},{"gt","leq"},{"geq","lt"}}; binding* make_negated ( string v[][2] ) { binding* res = new binding; for(short i = sizeof(v); i>=0; i--) res = res->extend(&v[i][0],&v[i][1]); return res; }; static binding* negated_comparisons = make_negated(ngv); Names string_vector ( string v[] ) { Names res = new list; for(short i = sizeof(v)-1; i>=0; i--) res = res->cons(&v[i]); return res; }; static string cmp[] = { "eq", "neq", "gt", "lt", "geq", "leq" }; Names comparisons = string_vector(cmp); /* all_plans holds the names of all physical plans */ list* all_plans = Nil->cons(#)->cons(#)->cons(#)->cons(#) ->cons(#)->cons(#)->cons(#)->cons(#) ->cons(#)->cons(#)->cons(#)->cons(#); /* does v occur in x? */ bool occurs ( String v, Expr x ) { #case x | `f(...r) => { if (occurs(v,f)) return true; for(list* s=r; s->consp(); s=s->tl) if (occurs(v,s->hd)) return true; }; | _ => if (x->variablep()) return v->eq(x->name()); else return false; #end; return false; }; /* retrieves the range variables from the iterators of the list of a comprehension quantifiers tl */ list* tables (list* tl) { list* res = Nil; for(list* r = tl; r->consp(); r=r->tl) #case r->hd | iterate(`table,`e) => res = res->cons(table); #end; return res; }; /* for each algebraic expression e, range_variables(e) collects * all range variables (along with their types) that appear in the leaves of e */ Schema range_variables (Expr e) { Schema res = new binding; #case e | unnest(_,`tb,`v,...r) => return range_variables(tb)->extend(v->name(),type_of(tb)->arguments()->hd); | nest(_,`tb,`v,...r) => return range_variables(tb)->extend(v->name(),type_of(tb)->arguments()->hd); | get(_,`tb,`v,_) => if (tb->variablep()) return res->extend(v->name(),type_of(tb)->arguments()->hd); | `f(...r) => for(list* s = r; s->consp(); s=s->tl) res = res->extend(range_variables(s->hd)); #end; return res; }; /* for each plan or algebraic expression e, range_vars(e) collects * all range variables that appear in the leaves of e */ list* range_vars ( Expr e ) { list* res = Nil; #case e | unnest(_,`tb,`v,...r) => return res->append(range_vars(tb))->cons(v); | nest(_,`tb,`v,...r) => return res->append(range_vars(tb))->cons(v); | get(_,`tb,`v,_) => if (tb->variablep()) return res->append(range_vars(tb))->cons(v); | UNNEST(`tb,`v,...r) => return res->append(range_vars(tb))->cons(v); | NEST(_,`tb,`v,...r) => return res->append(range_vars(tb))->cons(v); | GROUP_BY(_,`tb,`v,...r) => return res->append(range_vars(tb))->cons(v); | TABLE_SCAN(`tb,`v,_) => if (tb->variablep()) return res->append(range_vars(tb))->cons(v); | INDEX_SCAN(`tb,`v,_,_) => if (tb->variablep()) return res->append(range_vars(tb))->cons(v); | `f(...r) => for(list* s = r; s->consp(); s=s->tl) res = res->append(range_vars(s->hd)); #end; return res; }; /* for each algebraic expression e, groupby_vars(e) collects all range variables that appear * in an unnest or get; these variables are used as groupby_vars in a nest operations */ list* groupby_vars ( Expr e ) { list* res = Nil; #case e | unnest(_,`tb,`v,...r) => return res->append(groupby_vars(tb))->cons(v); | nest(_,_,`v,_,vars(...r),_,_) => return r; | Nest(_,_,`v,_,vars(...r),_,_) => return r; | get(_,`tb,`v,_) => if (tb->variablep()) return res->append(groupby_vars(tb))->cons(v); | `f(...r) => for(list* s = r; s->consp(); s=s->tl) res = res->append(groupby_vars(s->hd)); #end; return res; }; /* for each plan e, nesting_vars(e) collects all range variables that appear in a NEST plan */ list* all_nesting_vars ( Expr e ) { list* res = Nil; #case e | NEST(_,`plan,`v,_,_,vars(...r),_) => return r->append(all_nesting_vars(plan))->cons(v); | GROUP_BY(_,`plan,`v,_,_,vars(...r),_) => return r->append(all_nesting_vars(plan))->cons(v); | `f(...r) => for(list* s = r; s->consp(); s=s->tl) res = res->append(all_nesting_vars(s->hd)); #end; return res; }; /* true if x is a plan */ bool is_plan ( Expr x ) { #case x | `f(...r) => return member(f,all_plans); | _ => return false; #end; }; /* returns true if pred refers to a range variable in tables */ bool refers_to ( Expr pred, list* tables ) { #case pred | nested_query(`x,`e) => return false; | project(`x,`a) => return refers_to(x,tables); | `f(...r) => for(; r->consp(); r=r->tl) if (refers_to(r->hd,tables)) return true; | `v => if (member(v,tables)) return true; #end; return false; }; /* returns the head of a path expression */ Expr path_root ( Expr path ) { #case path | project(`x,`a) => return path_root(x); | `e => if (e->variablep()) return e; #end; return #; }; list* set_union ( list* x, list* y ) { list* res = x; for(list* r = y; r->consp(); r=r->tl) if (!(member(r->hd,res))) res = res->cons(r->hd); return res; }; list* all_materialized_vars ( Expr e ) { #case e | eq(OID(project(_,_)),OID(`v)) => return Cons(v,Nil); | `f(...r) => { list* res = Nil; for(list* s = r; s->consp(); s=s->tl) res = res->append(all_materialized_vars(s->hd)); return res; }; | _ => return Nil; #end; }; /* returns the attributes of the range variables in tables * that participate in the predicate pred */ list* get_order ( Expr pred, list* tables ) { list* res = Nil; #case pred | OID(`x) => if (refers_to(x,tables)) res = res->cons(pred); else return get_order(x,tables); | project(`x,`a) => if (refers_to(pred,tables)) res = res->cons(pred); | `f(...r) => for(; r->consp(); r=r->tl) res = set_union(res,get_order(r->hd,tables)); | _ => if (pred->variablep() && member(pred,tables)) res = res->cons(pred); #end; return res; }; /* true if there is an equality predicate that binds an attribute of a table * in left_tables with an attribute of a table in right_tables. * This will imply that the join between these tables will be an equijoin */ bool equijoinp ( Expr pred, list* left_tables, list* right_tables ) { #case pred | and(...r) => for(; r->consp(); r=r->tl) #case r->hd | eq(`x,`y) => if (refers_to(r->hd,left_tables) && refers_to(r->hd,right_tables)) return true; #end; #end; return false; }; /* finds an equality predicate from pred that associates attributes of the tables * in left_tables with attributes of tables in right_tables (if there is one). * Then it returns the attributes of the tables in the left_table that are involved * in this predicate. These attributes determine the order in which the left input * of a merge join must be sorted */ Expr required_order ( Expr pred, list* left_tables, list* right_tables ) { list* ords = Nil; #case pred | and(...r) => for(; r->consp(); r=r->tl) #case r->hd | eq(`x,`y) => if (refers_to(r->hd,left_tables) && refers_to(r->hd,right_tables)) ords = get_order(r->hd,left_tables); #end; #end; return #; }; /* splits the predicate pred into three parts: it returns the * predicate and(and(...left),and(...right), and(...rest)), where left * holds all predicates that refer to the left_tables exclusively; right * holds all predicates that refer to the right_tables exclusively; and * rest holds all predicates that refer to both left and right tables */ Expr split_predicate ( Expr pred, list* left_tables, list* right_tables ) { list* lefts = Nil; list* rights = Nil; list* rests = Nil; #case pred | and(...r) => for(; r->consp(); r=r->tl) { bool bl = refers_to(r->hd,left_tables); bool br = refers_to(r->hd,right_tables); if (bl && br) rests = rests->cons(r->hd); else if (bl) lefts = lefts->cons(r->hd); else if (br) rights = rights->cons(r->hd); else rests = rests->cons(r->hd); }; return #; #end; }; /* true if the expected order is at least as strong as the computed order. * expected_order = computed_order plus something */ bool subsumes ( Expr computed_order, Expr expected_order ) { #case computed_order | order(...co) => #case expected_order | order(...eo) => { list* s = eo; for(list* r = co; r->consp(); r=r->tl, s=s->tl) if (s->nullp()) return false; else if (!r->hd->eq(s->hd) && !r->hd->eq(#hd))>) && !#hd))>->eq(s->hd)) return false; return true; }; | _ => return false; #end; | _ => return false; #end; }; /* Retrieves statistics and indexes from the meta-database */ int table_cardinality ( Expr table ) { return find_extent(table->name())->cardinality; }; int table_size ( Expr table ) { return find_extent(table->name())->object_size * find_extent(table->name())->cardinality; }; Expr find_index ( Expr table, Expr order ) { Names ns = new list; for(list* r = order->arguments(); r->consp(); r=r->tl) #case r->hd | project(_,`n) => ns = ns->cons(n->name()); #end; return find_extent(table->name())->find_index(ns->reverse()); }; /* selectivity estimation; needs lots of work */ float selectivity ( Expr pred ) { float sel = 1.0; for(list* r = pred->arguments(); r->consp(); r=r->tl) #case r->hd | eq(project(`x,`a),`v) => sel = sel/10.0; | eq(`v,project(`x,`a)) => sel = sel/10.0; | _ => sel = sel/100.0; #end; return sel; }; /* the cost of accessing an index (B-tree) */ float log_cost ( int data_blocks ) { if (data_blocks <= 1) return 1.0; return ceil(log(data_blocks)); }; /* the cost of accessing an index to satisfy a required order io and a predicate pred */ float index_access_cost ( Expr table, Expr pred, list* io ) { float size = table_size(table); for(list* r = pred->arguments(); r->consp(); r=r->tl) #case r->hd | eq(project(`x,`a),`v) => if (member(#,io)) size = size/10.0; | eq(`v,project(`x,`a)) => if (member(#,io)) size = size/10.0; | `f(project(`x,`a),`v) => if (member(#,io)) size = size/2.0; | `f(`v,project(`x,`a)) => if (member(#,io)) size = size/2.0; #end; return size + log_cost(table_size(table)); }; /* returns all indexes attached to table ranged by the range variable name */ list* all_table_indexes ( Expr table, Expr name ) { binding* indexes = new binding; list* res = Nil; for(Names n = indexes->names(); n->consp(); n=n->tl) { list* idx = Nil; for(list* r = indexes->find(n->hd)->arguments()->reverse(); r->consp(); r=r->tl) idx = idx->cons(#hd))>); res = res->cons(#hd)),order(...idx))>); }; return res; }; %{ /* set this to true if you want to get a tracing of the rule system; lots of output */ #define trace_rules false /* set this to true if you want to print the results of every optimization module */ #define trace_stages true %trace { 0 } /* inherited attributes */ %inherited order, /* the expected order (for the physical stage) */ inner /* variables to group_by in a nest operation (Stage 3) */ /* synthesized atttributes: */ %synthesized { int size; /* the collection cardinality */ float cost; /* the plan cost */ Expr order; /* the computed (real) order */ } = { 0, 10000.0, # } /******************** LOGICAL RULES ***********************************/ /* Stage 1: Normalization of comprehensions */ %logical /* projecting over a tuple is selecting a tuple element */ project(tuple(...r,bind(`b,`e),...s),`a) : a->eq(b) => `e; /* if a generator iterates over an empty collection, return a zero */ compr(`M,`e,...q,iterate(`v,zero(`N)),...s) => zero(`M); /* if a generator iterates over an singleton collection, make the iterator a binding */ compr(`M,`e,...q,iterate(`v,unit(`N,`u)),...s) => let Expr ne = subst_expr(v,u,e), list* ns = subst_list(v,u,s) in compr(`M,`ne,...q,...ns); /* if a generator iterates over a union, split the comprehension */ compr(`M,`e,...q,iterate(`v,merge(`N,`a,`b)),...s) : commutative(M) || q->nullp() => merge(`M,compr(`M,`e,...q,iterate(`v,`a),...s), compr(`M,`e,...q,iterate(`v,`b),...s)); /* unnesting nested comprehensions */ compr(`M,`e,...q,iterate(`v,compr(`N,`u,...r)),...s) => let Expr ne = subst_expr(v,u,e), list* ns = subst_list(v,u,s) in compr(`M,`ne,...q,...r,...ns); /* unnesting nested comprehensions */ compr(`M,`e,...q,compr(some,`pred,...r),...s) : idempotent(M) => compr(`M,`e,...q,`pred,...r,...s); /* Stage 2: Normalization of predicates (DeMorgan's laws etc) */ %logical compr(`M,`e) => unit(`M,`e); compr(`M,`e,...r) => Compr(`M,`e,binds(),and(),...r); Compr(`M,`e,binds(...r),`pred,iterate(`v,`u),...s) => Compr(`M,`e,binds(...r,iterate(`v,`u)),`pred,...s); Compr(`M,`e,`bs,and(...r),and(...t),...s) => Compr(`M,`e,`bs,and(...r,...t),...s); Compr(`M,`e,`bs,and(...r),`p,...s) => Compr(`M,`e,`bs,and(...r,`p),...s); Compr(`M,`e,`bs,and(...r,and(`x,`y),...s)) => Compr(`M,`e,`bs,and(...r,`x,`y,...s)); Compr(`M,`e,`bs,and(...r,not(and(`x,`y)),...s)) => Compr(`M,`e,`bs,and(...r,or(not(`x),not(`y)),...s)); Compr(`M,`e,`bs,and(...r,not(or(`x,`y)),...s)) => Compr(`M,`e,`bs,and(...r,not(`x),not(`y),...s)); Compr(`M,`e,`bs,and(...r,not(`f(`x,`y)),...s)) : member(f,comparisons) => let Expr g = variable(negated_comparisons->find(f->name())) in Compr(`M,`e,`bs,and(...r,`g(`x,`y),...s)); Compr(`M,`e,`bs,and(...r,or(`x,`y),...s)) : commutative(M) => merge(`M,Compr(`M,`e,`bs,and(...r,`x,...s)), Compr(`M,`e,`bs,and(...r,`y,...s))); /* Stage 3: Translation into joins and unnests; unesting nested queries */ %logical /* the first iterator of an outer join becomes an extent traversal */ { inner = vars(); } Compr( `M, `e<-{ inner=vars(`v,...(tables(r))); }, binds(iterate(`v,`extent),...r), `pred<-{ inner=vars(`v,...(tables(r))); } ) : extent->variablep() => #case split_predicate(pred,Cons(v,Nil),tables(r)) | and(`left,and(...right),and(...rest)) => compile(compr(`M,`e,binds(...r),and(...right,...rest)), get(`M,`extent,`v,`left)); #end; /* ... but if this is inner comprehension, translate the first iterator into a nested query (to be unnested later) */ { inner = vars(`v,...vs); } Compr( `M, `e, binds(...r), `pred ) => let Expr nv = new_variable() in nested_query(`nv,compile(compr(`M,`e,binds(...r),`pred),`nv)); /* for either inner or outer comprehension, translate the other iterators into a join, if it ranges over an extent ... */ { inner = vars(...vs); } compile(compr(`M,`e,binds(iterate(`v,`extent),...r),`pred),`E) : extent->variablep() => #case split_predicate(pred,Cons(v,Nil),tables(r)->append(range_vars(E))->append(vs)) | and(`left,`right,`rest) => let Expr keep = vs->consp() ? # : # in compile(compr(`M,`e,binds(...r),`right), join(`M,get(`M,`extent,`v,`left),`E,`rest,`keep)); #end; /* ... or into an unnest, if it ranges over a nested set */ { inner = vars(...vs); } compile(compr(`M,`e,binds(iterate(`v,`path),...r),`pred),`E) => #case split_predicate(pred,Cons(v,Nil),tables(r)->append(vs)) | and(`left,and(...right),and(...rest)) => let Expr keep = vs->consp() ? # : # in compile(compr(`M,`e,binds(...r),and(...right,...rest)), unnest(`M,`E,`v,`path,`left,`keep)); #end; /* nested queries in the comprehension predicate are unnested (they inserted into the main body) and then a Nest operator is used to do the right nesting of values */ { inner = vars(...vs); } compile(compr(`M,`e,binds(), `f[nested_query(`x,reduce(`N,`U,`v,`hd,`pred))]), `E) => let Expr new_U = subst_expr(x,E,U), list* gvars = groupby_vars(E)->append(vs) in compile(compr(`M,`e,binds(),`f[`v]), Nest(`N,`new_U,`v,`hd,vars(...gvars),vars(),`pred)); /* the same for a nested query in the comprehension head */ { inner = vars(...vs); } compile(compr(`M,`f[nested_query(`x,reduce(`N,`U,`v,`hd,`pred))], binds(),and()), `E) => let Expr new_U = subst_expr(x,E,U), list* gvars = groupby_vars(E)->append(vs) in compile(compr(`M,`f[`v],binds(),and()), Nest(`N,`new_U,`v,`hd,vars(...gvars),vars(),`pred)); /* the outer comprehension is translated into a reduction */ compile(compr(`M,`e,binds(),`pred),`E) => let Expr nv = new_variable() in reduce(`M,`E,`nv,`e,`pred); /* shortcut: if we join the same domain and then nest over the right domain, then simply nest the right domain. This is used when translating OQL group stmts */ Nest( `M, join( `N, `e1, `e2, and(...r,eq(`p1,`p2),...s), right ), `v, `x, vars(`y), `ov,and(...ps) ) : x->variablep() && y->variablep() && subst_expr(x,y,e1)->eq(e2) && ( subst_expr(x,y,p1)->eq(p2) || subst_expr(x,y,p2)->eq(p1) ) => let Expr by = subst_expr(x,y,p1), Expr nv = new_variable() in Nest( `M, map(`M,`e2,`nv,`by), `v, `y, vars(`nv), `ov, and(...ps,...r,...s) ); /* the same but for the left domain */ Nest( `M, join( `N, `e1, `e2, and(...r,eq(`p1,`p2),...s), left ), `v, `x, vars(`y), `ov, and(...ps) ) : x->variablep() && y->variablep() && subst_expr(x,y,e2)->eq(e1) && ( subst_expr(x,y,p1)->eq(p2) || subst_expr(x,y,p2)->eq(p1) ) => let Expr by = subst_expr(x,y,p1), Expr nv = new_variable() in Nest( `M, map(`M,`e1,`nv,`by), `v, `y, vars(`nv), `ov, and(...ps,...r,...s) ); /* Stage 4: materialization of path expressions */ %logical /* simple form of materialization of a path x.A.B in a predicate of a get operation */ get(`M,`tbl,`v,and(...r,`f[project(project(`x,`A),`B)],...s)) : v->eq(x) && extentp(type_of(#, range_variables(#))) => let Expr extent = get_extent(type_of(#, range_variables(#))), Expr nv = new_variable(), Expr pred = #<`f[project(`nv,`B)]>, bool inp = occurs(v->name(),pred), Expr gpred = inp ? # : #, Expr jpred = inp ? # : # in join( bag, get(`M,`tbl,`v,and(...r,...s)), get(bag,`extent,`nv,`gpred), `jpred, none ); /* if we have already materialize the path x.A.B, then don't do that again */ `op(...r,`f[project(project(`x,`A),`B)],...s) : x->variablep() => #case #<`op(...r,`f[0],...s)> | `g[eq(OID(`y),OID(`nv))] : y->eq(#) => `op(...r,`f[project(`nv,`B)],...s); #end; /* otherwise, materialize x.A.B into a pointer join between e (that generates x) and the extent of x.A */ `op( ...r, `e, ...s, `f[project(project(`x,`A),`B)], ...t ) : x->variablep() && range_variables(e)->in(x->name()) && extentp(type_of(#,range_variables(e))) => let Expr extent = get_extent(type_of(#, range_variables(e))), Expr nv = new_variable() in `op( ...r, join( bag, `e, get(bag,`extent,`nv,and()), and(eq(OID(project(`x,`A)),OID(`nv))), left ), ...s, `f[project(`nv,`B)], ...t ); /* otherwise, translate Nest into nest */ Nest(`M,`e,`v,`hd,vars(...gvars),`ov,`pred) => nest(`M,`e,`v,`hd,vars(...(set_union(gvars,all_materialized_vars(e)))),`ov,`pred); /* Stage 5: join permutation */ %logical /* eliminate trivial selections */ select(`e,and()) => `e; /* fuse cascaded selections */ select( select(`e,and(...r)), and(...s) ) => select(`e,and(...r,...s)); /* fuse a selection with a join */ selection(join(`M,`x,`y,and(...p1),`k),and(...p2)) => join(`M,`x,`y,and(...p1,...p2),`k); /* fuse a selection with a get */ selection(get(`M,`table,`v,and(...p1)),and(...p2)) => get(`M,`table,`v,and(...p1,...p2)); /* fuse a selection with a reduce */ selection(reduce(`M,`e,`v,`hd,and(...p1)),and(...p2)) => reduce(`M,`e,`v,`hd,and(...p1,...p2)); /* fuse a selection with a nest */ selection(nest(`M,`e,`v,`hd,`gv,`ov,and(...p1)),and(...p2)) => nest(`M,`e,`v,`hd,`gv,`ov,and(...p1,...p2)); /* fuse a selection with a uunest */ selection(unnest(`M,`e,`v,`path,and(...p1),`k),and(...p2)) => unnest(`M,`e,`v,`path,and(...p1,...p2),`k); /* permute unnests */ unnest(`M,unnest(`N,`e,`x,`path1,`pred1,`k1),`y,`path2,`pred2,`k2) : commutative(M) && commutative(N) && !path_root(path2)->eq(x) = unnest(`N,unnest(`M,`e,`y,`path2,`pred2,`k2),`x,`path1,`pred1,`k1); /* commutativity rule */ join(`M,`x,`y,`pred,`k) : commutative(M) = let Expr keep = k->eq(#) ? k : ( k->eq(#) ? # : # ) in join(`M,`y,`x,`pred,`keep); /* associativity rule */ join(`N,join(`M,`x,`y,and(...p1),`k1),`z,and(...p2),`k2) : commutative(M) && commutative(N) && k1->eq(k2) = #case split_predicate(#,range_vars(x), Append(range_vars(y),range_vars(z))) | and(`left,`right,`rest) => join(`N,selection(`x,`left), join(`M,`y,`z,`right,`k1),`rest,`k1); #end; /* -------------------- PHYSICAL RULES -------------------------- */ %physical /* selection can be done during TABLE_SCAN */ { order=`ord; } get(`M,`table,`name,`pred) : subsumes(ord,#) = TABLE_SCAN(`table,`name,`pred) : { size = int(table_cardinality(table)*selectivity(pred)); cost = table_size(table); order = #; }; /* selection can be done during INDEX_SCAN */ { order=order(`o,...os); } get(`M,`table,`name,`pred) = #case find_index(table,#) | index(`idx,order(...io)) => INDEX_SCAN(`table,`name,`pred,`idx,order(...io)) : { size = int(table_cardinality(table)*selectivity(pred)); cost = index_access_cost(table,pred,io); order = #; }; #end; /* if requested order is empty then use any index; so here we use ALL indexes */ { order=order(); } get(`M,`table,`name,`pred) = #forall r in all_table_indexes(table,name) do #case r | index(`idx,order(...io)) => INDEX_SCAN(`table,`name,`pred,`idx,order(...io)) : { size = int(table_cardinality(table)*selectivity(pred)); cost = index_access_cost(table,pred,io); order = #; }; #end; #end; /* if an non-empty order is requested then SORT the plan x; of course now */ /* the required order for x should be empty so this rule is applied only once */ { order=order(`o,...os); } (`x<-{ order=order(); }) : is_plan(x) && !subsumes(#,^x.order) = SORT(`x,order(`o,...os)) : { size = ^x.size; cost = ^x.cost+(^x.size*log_cost(^x.size)); order = #; }; /* convert join into a nested loop; the required order propagates to the left input only */ { order=`ord; } join( `M, `x<-{ order=`ord; }, `y<-{ order=order(); }, `pred, `keep ) : refers_to(ord,range_vars(x)) = NESTED_LOOP(`x,`y,`pred,`keep) : { size = int(^x.size*^y.size*selectivity(pred)); cost = ^x.cost+^y.cost+(^x.size*^y.size); order = ^x.order; }; /* Otherwise, it's a regular nested loop */ { order=order(); } join( `M, `x, `y, `pred, `keep ) = NESTED_LOOP(`x,`y,`pred,`keep) : { size = int(^x.size*^y.size*selectivity(pred)); cost = ^x.cost+^y.cost+(^x.size*^y.size); order = ^x.order; }; /* convert join into a merge join; both inputs are required to be sorted */ { order=`ord; } join( `M, `x<-{ order=`(required_order(pred,range_vars(x),range_vars(y))); }, `y<-{ order=`(required_order(pred,range_vars(y),range_vars(x))); }, `pred, `keep ) : commutative(M) && equijoinp(pred,range_vars(x),range_vars(y)) && subsumes(ord,^x.order) = MERGE_JOIN(`x,`y,`pred,`keep, `(required_order(pred,range_vars(x),range_vars(y))), `(required_order(pred,range_vars(y),range_vars(x)))) : { size = int((^x.size+^y.size)*selectivity(pred)); cost = ^x.cost+^y.cost+(^x.size+^y.size); order = ^x.order; }; /* GROUP BY needs its input sorted; it scans the input only once */ { order=`ord; } nest( `M, `e<-{ order=order(...glist); }, `v, `hd, vars(...glist), vars(...r), `pred ) : commutative(M) && subsumes(ord,^e.order) = let list* other_nestvars = r->append(all_nesting_vars(e)) in GROUP_BY(`M,`e,`v,`hd,vars(...glist),vars(...other_nestvars),`pred ) : { size = ^e.size; cost = ^e.cost+^e.size; order = ^e.order; }; /* NEST is less efficient than GROUP_BY: it is like a nested loop over itself */ { order=order(); } nest( `M, `e, `v, `hd, `gv, vars(...r), `pred ) = let list* other_nestvars = r->append(all_nesting_vars(e)) in NEST(`M,`e,`v,`hd,`gv,vars(...other_nestvars),`pred ) : { size = ^e.size; cost = ^e.cost+(^e.size*^e.size); order = #; }; { order=order(); } reduce(`M,`e,`v,`head,`pred) = REDUCE(`M,`e,`v,`head,`pred) : { size = ^e.size; cost = ^e.cost+^e.size; order = ^e.order; }; { order=order(); } merge(`M,`x,`y) = MERGE(`M,`x,`y) : { size = ^x.size+^y.size; cost = ^x.cost+^y.cost+^x.size+^y.size; order = #; }; { order=order(); } unnest( `M, `e, `v, `path, `pred, `keep ) = UNNEST( `e, `v, `path, `pred, `keep ) : { size = ^e.size; cost = ^e.cost+^e.size; order = ^e.order; }; { order=`ord; } map( `M, `e<-{ order=`ord; }, `v, `f ) : subsumes(ord,^e.order) = MAP( `e, `v, `f ) : { size = ^e.size; cost = ^e.cost+^e.size; order = ^e.order; }; %} /*--------------------------------------------------------------------------------*/ bool better_plan ( plan* x, plan* y ) { return x->attributes.cost < y->attributes.cost; }; /* returns the 5 best plans from a list of plans */ list* ten_best_plans ( list* plans ) { list* res = new list; short i = 0; for(list* r = plans->sort(better_plan); i<1 && r->consp(); r=r->tl, i++) res = res->cons(r->hd); return res; }; /* optimize an OQL query */ Expr optimize_query (Expr query) { optimizer_init(); /* initial inherited attribute */ inherited init; init.inner = #; init.order = #; /* set this to false if you want to generate algebraic expressions only */ bool generate_physical_plans = true; /* best_plans is a pointer to a function that filters out plans during the beam serach */ best_plans = &ten_best_plans; list* plans = rewrite(query,init,generate_physical_plans); return ten_best_plans(plans)->hd->expression; };