Wed Jul 16 13:33:09 PDT 1997 Maintenance =========== Serge brought up a good point last week. The problem is that in a batch of updates it is necessary to either: 1. Only execute a single change at a time 2. Keep the old state of the database along with the new state of the database around. Option (1) will cause serious problems with the run time of incremental maintenance, while option (2) requires a new and old state. Detailed View Maintenance Algorithm =================================== // There are three different changes that we are concerned with enum changeType { addEdge, deleteEdge, changeAtomic }; // For our purposes a Query is just a struct containing all the pieces of // an update query struct Query { char* update; // String representation of update part of statment char* select; // ... select char* from; // ... from char* where; // ... where char* with; // ... with }; // Changes are represented by the following datastructure struct Change { ChangeType type; // The type of change Oid oid; // The OID of the object that was changed Value newValue; // New value after change had been applied Value oldValue; // Value before change was applied OidList matchedPath; // Path taken to get to object that was changed }; // Obviously there should be more here, but for our purposes we are only // concerned with the (single!) query used to define the view and the // "interesting" oids. struct view { Query viewSpec; // The query used to define the view Set interestingOids; // Set of interesting objects Set interestingLabels; // Set of interesting labels // Recall that "white" objects are those that are in the select clause // of the view and "black" objects are those that are in the view as a // result of the With clause Set white; // White zone objects in view Set black; // Black zone objects in view }; // Maintain will take a set of views and a set of changes and first // determine which views need to change, then determine the most efficient // way to perform the updates and then reconcile each view. void Maintain (Set views, Set changes) { // For each defined view for each v in views // Compute the list of queries that would be necessary to // incrementally maintain the view List *updateQ = ComputeIncremental (v, Relevant(v, changes); // Determine which is more efficient: the set of update queries or // recomputing the view via the original view specification if (MoreEfficient (updateQ, v.viewSpec)) Execute (updateQ); else Execute (new List(v.viewSpec)); } } // Relevant will take a single view and the set of changes and determine // whether any of the changes to the source database could have had an // affect on the view Set Relevant (View v, Set changes) { Set newchanges = empty; for each change in changes { if change.type == addEdge or deleteEdge { if (change.oid is in v.interestingOids and change.label is in v.interestingLabels ) put change in newchanges; } else if (change.oid is in v.interestingOids) put change in newchanges; } return newchanges; } // ComputeIncremental will accept a single view and a set of changes and // compute the List (order is important) of update queries to be applied // to the view that will reconcile the view with the source List ComputeIncremental (View v, Set changes) { // The following will work only in the absence of both negation // AND oid-inequalities in the view definition A // The return value is initialized to empty List rv; // For each change in the set -- easy to change this to // process changes in batches of "compatible" changes for each change in changes Query q // We assume that the oid of a change is the oid of the //object it refers to switch (change.type) { case addEdge: // First locate the variable(s) 'x' that corresponds to the // variable in the view specification and change For each variable, x, matching v.viewSpec and change { q.update = "WHITE += "; q.select = v.viewSpec.select; // We cannot always change the from clause into: // "(a.string intersect {change}) as x" since the variable // may appear in the where. Safest thing to do is just add // a new clause to the where, which covers all cases q.from = v.viewSpec.from; q.where = v.viewSpec.where; q.with = ""; // Here the x will have to be determined, but it would have // to be the variable that was add q.where += " and ( x == ", change.oid, " ) "; allUnioned = allUnioned + " union " + q; } rv = rv + allUnioned; case deleteEdge: // First locate the variable(s) 'x' that corresponds to the // variable in the view specification and change For each variable, x, matching v.viewSpec and change { q.update = "WHITE -= "; q.select = v.viewSpec.select; q.from = v.viewSpec.from; if (x is in v.viewSpec.from) { // ** We need to add something that says if x is the same // as the changes object then use the old objects // value } q.with = ""; q.where += " and ( x == ", change.oid, " ) "; // Finally, we must add another clause that checks to see // if the object is within the view for another reason q.where += " and ( ", FreshVariables(v.viewSpec.select); // We cannot directly use the same from clause because then // we wouldn't be able to access the same variables in the // scope above, so Permute them, but remeber the permutation q.where += FreshVariables (v.viewSpec.from); q.where += FreshVariables (v.viewSpec.where); // The z variable is the one which appears in the select q.where += " and (z' == z) ) = emptyset " allUnioned = allUnioned + " union " + q; } rv = rv + allUnioned; break; case changeAtomic: Query q1, q2; // This will probably be the most costly aspect (if we // cannot optimize), but the changed object could impact any // of the variables used in the where clause, so here we // have to consider them all: For each variable, v, in Where clause and for the variable in the select clause { // Caveat here is that we might add the same object 2! // But this is okay since we have sets q1.update = "WHITE += "; q1.select = v.viewSpec.select; q1.from = v.viewSpec.from; q1.where = v.viewSpec.where; q1.with = ""; q1.where += " ( and x == ", change.oid, " ) "; allq1s = allq1s + " union " + q1; q2.update = "WHITE -= "; q2.select = v.viewSpec.select; q2.from = v.viewSpec.from; q2.where = " ( ", v.viewSpec.where, " ) == FALSE "; q2.with = ""; // Here the x will have to be determined, but it would have // to be the variable corresponding to the object that was // changed q2.where += " and ( x == ", change.oid, " ) "; // Also have to check to see if this object was here for // another reason ala delete edge. q.where += " and ( ", FreshVariables(v.viewSpec.select); // We cannot directly use the same from clause because then // we wouldn't be able to access the same variables in the // scope above, so Permute them, but remeber the permutation q.where += FreshVariables (v.viewSpec.from); q.where += FreshVariables (v.viewSpec.where); // The z variable is the one which appears in the select q.where += " and (z' == z) ) = emptyset " allq2s = allq2s + " union " + q2; } rv = rv + allq1s + allq2s; break; } } // ** Need to define ADD_WHITE and DELETE_WHITE rv += ComputeIncrementalBlack (v, ADD_WHITE, DELETE_WHITE); return rv; } // MoreEfficient will return TRUE if the list of incremental queries is // more efficient then reevaluating the original view specification Boolean MoreEfficient (List updateQ, Query originalView) { // ** Janet to fill in function ** if (updateQ < originalView) return TRUE; else return FALSE; } // Execute will accept a List of Queries and execute each one in turn. // Here order is important since for the incremental queries I must do all // the "white" queries before any of the "black" void Execute (Set queries) { for q next in queries Run Query q; } // Pseudocode for the "black zone" // The assumption is that instead of just changing WHITE in place, we // create ADD_WHITE and DEL_WHITE sets of objects that actually have to // be deleted or inserted in the view. // // In the case where the select has no object creation, ie it only // selects one variable, then all the ADD_WHITE and DEL_WHITE // objects are valuations for this one variable. In particular: // The Lorel update language breaks down for updating the with part of // our view specification, since it doesn't allow us to add an object and an // edge to that object at the same time. We could do it one at a time (ie, // write two queries per object added due to the with, one to add // the object itself and one to "connect" it through the label), // but that doesn't generalise at all to batch processing, so we // have to come up with a language feature. I'm just fudging the // issue here, the pseudocode is really pseudo and it also // generates pseudo-Lorel. But I think it does the right thing. // The procedure takes the set of additions and deletions to WHITE // and the set of changes and computes for each triple // // the set of triples // This is done in order, since changes to some BLACK sets cause changes in // other BLACK sets // // This is my way of fudging the issue of actually specifying labels // as part of the output of our update queries, since they have to be // part of the changes that the view finally has to install. // I am not sure exactly how to write this, but we can "preprocess" the // 'with' clause, so that it tells us which variable depends on which. // It is true that each variable depends only on one "previous" variable // since we only have simple paths, ie in // with A.B X, X.C Y, X.D W, W.E T // X depends on WHITE and B edges, Y depends on BLACK_X and C_edges, // W depends on BLACK_X and D edges and T depends on BLACK_W and E edges. **************************/ List ComputeIncrementalBlack (View v, Set ADD_WHITE, Set NEW_WHITE) { // The return value is initialized to empty List rv; For each "expression" A.label B (triple in the above comment) in the with clause do { // We deal with deletions first q_i.update = " DEL_BLACK_i ="; // I assume that if A.label doesn't exist, then the triple is not // created, right? q_i.select = "A, label, B"; // With one change for A: "A in RelevantSet(A)"; // The function RelevantSet returns the name of the set among DEL_WHITE, // and DEL_BLACK_j, j <= i that A "depends on". q_i.from = v.viewSpec.from; q_i.where = "" q_i.with = ""; rv = rv + q_i // Only additions and deletions of edges are relevant to the with // clause, not atomic value updates q1_i.update = " DEL_BLACK_i ="; q1_i.select = "A, label, B"; q1_i.from = v.viewSpec.from; q1_i.where = "A.label.oid == del_edge_changes.oid " // We are working with the labels of changes here, like in // deleteEdge above, so A.label exists q1_i.with = ""; rv = rv + q1_i // Now add the only edge additions that are interesting are the ones // referring to labels appearing IN THE WITH same for deletions above q2_i.update = " ADD_BLACK_i ="; q2_i.select = "A, label, B"; q2_i.from = v.viewSpec.from; q2_i.where = "A.label.oid == add_edge_changes.oid " q2_i.with = ""; rv = rv + q2_i q'_i.update = " ADD_BLACK_i ="; q'_i.select = "A, label, B"; // With one change for A: "A in RelevantSet(A)"; q'_i.from = v.viewSpec.from; q'_i.with = ""; rv = rv + q'_i // We need to pay attention to how we actually use the BLACK_i sets: // it could be that one "with" object is supposed to be deleted from // the view, because one of its edges is deleted, but another edge // is still there, or gets created or whatever. The standard semantics // for deletion of objects I think guarantee correctness in these // scenarios, but we should be careful in how we delete. } } /* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Questions to answer (in order of presumed priority) =================== -- How does the algorithm change if the select is more complex, in particular, if it just constructs a new oem by piecing together variables (select X,Y,Z), (no nested subqueries in the select etc) -- How does the algorithm change in the presence of negation and/ or oid-inequalities -- What can we propose to do about complex path expressions (if anything) (it'll have to be, like everything else, an execution option, that the optimizer will evaluate using the cost model Janet will come up with) -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */