How to linearize a binary and-or graph in C#? - c#

I try to 'linearize' every possibilities of a binary and-or tree to make it more easily readable. Every possibilities should be added to the following structure :
// (x1 AND x2) OR (x2 AND x3)
List<List<Node>> possibilities = new List<List<Node>>() {
{ x1, x2 },
{ x2, x3 }
};
I'm facing some difficulties to generate the list-based possibilities from a tree structure. A simplified version or my algorithm which doesn't return a correct answer in many case is:
class TreeDecomposer {
public List<TreePath> Possibilities = new List<TreePath>();
// TreePath = { List<TreeNode> path, bool IsAdded }
public TreeDecomposer(AbstractTree tree) {
DecomposeTree(tree, new TreePath());
}
public void DecomposeTree(AbstractTree tree, TreePath path)
{
// Add the path to the list of possibilities
if (!path.IsAdded)
{
Possibilities.Add(path);
path.IsAdded = true;
}
// Recursive browse
if (tree is TreeConnector) {
TreeConnector treeConnector = (TreeConnector)tree;
if (treeConnector.Connection == "&")
{
DecomposeTree(treeConnector.LeftTree, path);
DecomposeTree(treeConnector.RightTree, path);
}
else if (treeConnector.Connection == "|")
{
TreePath clonedPath = (TreePath)path.Clone(); // deep clone
DecomposeTree(treeConnector.LeftTree, path);
DecomposeTree(treeConnector.RightTree, clonedPath); // somehow 'or' operator multiplies possibilities by two?
}
}
// Leaf
else if (tree is TreeValue) {
TreeValue treeValue = (TreeValue)tree;
path.Add(treeValue);
}
}
}
I need help to find the correct algorithm working with my tree structure to browse the tree and construct every possibitilies of 'AND-path'.
Two basic example:
Binary end-or tree example (1)
Formula: (a | b) & (c | d)
Possibilities:
{
{a, c}, // or {c, a}, the order doesn't matter
{a, d},
{b, c},
{b, d}
}
Binary end-or tree example (2)
Formula: a & ((b | c) & d)
Possibilities:
{
{a, b, d}, // or {d, b, a}, the order doesn't matter
{a, c, d}
}
Tree structure:
The implementation or the Tree structure is the following:
abstract class AbstractTree {}
class TreeConnector: AbstractTree
{
public string Connection; // '&' or '|'
public AbstractTree LeftTree;
public AbstractTree RightTree;
}
class TreeValue : AbstractTree
{
public string Data; // 'a', or 'b', ...
}
Thanks a lot for your help.

Based on #Freggar link, here is a simplified implementation of the 'OR' distribution. It's probably not done in the most efficient way, but it gives a global idea of what I was looking for.
/*
TreePath = {
HashSet<TreeNode> path,
bool IsAdded // set to false even if it's true when an instance is cloned
}
Every class (Tree...) define the methods:
public object Clone()
public bool Equals(T typedObj)
public override bool Equals(object obj)
public override int GetHashCode()
*/
enum TreeBranch
{
Unknown,
Left,
Right
}
class TreeDecomposer {
public List<TreePath> Possibilities = new List<TreePath>();
public TreeDecomposer(AbstractTree tree)
{
DecomposeTree(null, tree, TreeBranch.Unknown, new TreePath());
RemoveDuplicatePaths();
}
public void DecomposeTree(AbstractTree parentNode, AbstractTree node, TreeBranch branch, TreePath path)
{
if (!path.IsAdded)
{
Possibilities.Add(path);
path.IsAdded = true;
}
// Recursive browse
if (node is TreeConnector) {
TreeConnector treeConnector = (TreeConnector)node;
if (treeConnector.Connection == "&")
{
DecomposeTree(treeConnector, treeConnector.LeftTree, TreeBranch.Left, path);
DecomposeTree(treeConnector, treeConnector.RightTree, TreeBranch.Right, path);
}
else if (treeConnector.Connection == "|")
{
// In this case, parentNode is a TreeOperator
if (parentNode != null)
{
// Left distribution
TreePath clonedPathLeftDistribution = (TreePath)path.Clone();
TreeConnector parentTreeConnectorLeftDistribution = (TreeConnector)parentNode.Clone();
// Right distribution
TreePath clonedPathRightDistribution = (TreePath)path.Clone();
TreeConnector parentTreeConnectorRightDistribution = (TreeConnector)parentNode.Clone();
if (branch == TreeBranch.Left)
{
parentTreeConnectorLeftDistribution.LeftTree = treeConnector.LeftTree;
parentTreeConnectorRightDistribution.LeftTree = treeConnector.RightTree;
}
else if (branch == TreeBranch.Right)
{
parentTreeConnectorLeftDistribution.RightTree = treeConnector.LeftTree;
parentTreeConnectorRightDistribution.RightTree = treeConnector.RightTree;
}
// Remove obsolete path
Possibilities.Remove(path);
// Browse recursively distributed tree ; the path must be different (by ref) if the parent operator is 'OR'
DecomposeTree(
parentTreeConnectorLeftDistribution,
parentTreeConnectorLeftDistribution.LeftTree,
TreeBranch.Left,
parentTreeConnectorLeftDistribution.Connection == "|"
? (TreePath)clonedPathLeftDistribution.Clone()
: clonedPathLeftDistribution
);
DecomposeTree(
parentTreeConnectorLeftDistribution,
parentTreeConnectorLeftDistribution.RightTree,
TreeBranch.Right,
clonedPathLeftDistribution
);
DecomposeTree(
parentTreeConnectorRightDistribution,
parentTreeConnectorRightDistribution.LeftTree,
TreeBranch.Left,
parentTreeConnectorLeftDistribution.Connection == "|"
? (TreePath)clonedPathRightDistribution.Clone()
: clonedPathRightDistribution
);
DecomposeTree(
parentTreeConnectorRightDistribution,
parentTreeConnectorRightDistribution.RightTree,
TreeBranch.Right,
clonedPathRightDistribution
);
}
// The operator is the root of the tree; we simply divide the path
else
{
TreePath clonedLeftPath = (TreePath)path.Clone();
TreePath clonedRightPath = (TreePath)path.Clone();
// Remove obsolete path
Possibilities.Remove(path);
DecomposeTree(treeConnector, treeConnector.LeftTree, TreeBranch.Left, clonedLeftPath);
DecomposeTree(treeConnector, treeConnector.RightTree, TreeBranch.Right, clonedRightPath);
}
}
break;
}
// Leaf
else if (node is TreeValue) {
TreeValue treeValue = (TreeValue)node;
path.Add(treeValue);
}
}
public void RemoveDuplicatePaths()
{
Possibilities = Possibilities.Distinct().ToList();
}
}
Note:
Here, I want to keep only the unique possibilities; that's why I use HashSet instead of List:
"a & a & b" => "a & b"
The method RemoveDuplicatePaths is used to remove duplicated combinations:
"a & b" and "b & a" are both the same possibility (regarding the truth value)

Related

Compare two lists of nodes with total match

I create the two lists of object, but cannot do total match value which is
var inputNodes = new List<nodes>()
{
new node() { nodeName= "D100", DataLength = 1 },
new node() { nodeName= "D101", DataLength = 1 },
new node() { nodeName= "D102", DataLength = 1 },
new node() { nodeName= "D103", DataLength = 1 },
new node() { nodeName= "D104", DataLength = 1 },
new node() { nodeName= "D105", DataLength = 1 },
new node() { nodeName = "D106", DataLength = 1 }
};
var inputNodes2 = new List<nodes>()
{
new node() { nodeName= "D100", DataLength = 1 },
new node() { nodeName= "D101", DataLength = 1 },
new node() { nodeName= "D102", DataLength = 1 },
new node() { nodeName= "D103", DataLength = 1 },
new node() { nodeName= "D104", DataLength = 1 },
new node() { nodeName= "D105", DataLength = 1 },
new node() { nodeName= "D106", DataLength = 1 }
};
I try to use check var isEqual = inputNodes.SequenceEqual(inputNodes2)
It return false and I don't want to use loop or list.select function
any idea for that ?
It seems to me that you are not familiar with the concept of equality, and how you can change the definition of equality to your definition. Hence I'll explain default equality and how to write an equality comparer that holds your idea of equality.
By default equality of objects is reference equality: two objects are equal if they refer to the same object:
Node A = new Node {...}
Node X = A;
Node Y = A;
Objects X and Y refer to the same object, and thus:
Assert(X == Y)
IEqualityComparer<Node> nodeComparer = EqualityComparer<Node>.Default;
Assert(nodeComparer.Equals(x, y));
However, in your case inputNodes[0] and inputNodes2[0] do not refer to the same object. Hence they are not equal Nodes, and thus SequenceEqual will return false.
You don't want to use the standard Equality comparison, you want a special one. According to your definition, two Nodes are equal, if the properties of the Nodes are equal. This definition of equality is called "value equality", as contrast to "reference equality"
Because you don't want to use the default reference equality, you'll have to write the equality comparer yourself. The easiest way to do this, is to derive a class from EqualityComparer.
public class NodeComparer : EqualityComparer<Node>
{
public static IEqualityComparer<Node> ValueComparer {get} = new NodeComparer();
public override bool Equals(Node x, Node y) {... TODO: implement}
public override int GetHashCode(node x) {... TODO: implement}
}
Usage will be as follows:
IEnumerable<Node> inputNodes1 = ...
IEnumerable<Node> inputNodes2 = ...
IEqualityComparer<Node> nodeComparer = NodeComparer.ValueComparer;
bool equalInputNodes = inputNodes1.SequenceEqual(inputNodes2, nodeComparer);
Equals
The definition depends on YOUR definition of equality. You can use any definition you need. In your case, you chose a straightforward "compare by value":
public override bool Equals(Node x, Node y)
{
// The following statements are almost always the same for every equality
if (x == null) return y == null; // true if both null
if (y == null) return false; // because x not null
if (Object.ReferenceEquals(x, y)) return true; // because same object
if (x.GetType() != y.GetType()) return false; // different types
In some occassions, these statements might be different. For example, if you want to create a string comparer where a null string equals an empty string:
string x = null;
string y = String.Empty;
IEqualityComparer<string> stringComparer = MyStringComparer.EmptyEqualsNull;
Assert(stringComparer.Equals(x, y));
Or, if you think that Teachers are Persons, than in some cases you might want that when you compare a Teacher with a Person, you might not want to check on the type.
But all in all, most comparers will use these four initial lines.
Continuing your equality:
return x.NodeName == y.NodeName
&& x.DataLength == y.DataLength;
To be prepared for the future, consider the following:
private static readonly IEqualityComparer<string> nodeNameComparer = StringComparer.Default;
and in your equals method:
return nodeNameComparer.Equals(x.NodeName, y.NodeName)
&& x.DataLength == y.DataLength;
So if in future you want to do a case insensitive string comparison, you only have to change the static declaration of your nodeNameComparer:
private static readonly IEqualityComparer<string> nodeNameComparer = StringComparer.OrdinalIgnoreCase;
GetHashCode
GetHashCode is meant to create a fast method to separate most unequal objects. This is useful, if your Node has two hundred properties, and you know, that if they have equal value for property Id, that very likely all other elements will be equal.
Note that I use "very likely". It is not guaranteed for 100% that if X has the same hashcode as Y, that X will equal Y. But you can be certain:
if X has a different hashcode than Y, then they will not be equal.
The only requirement for GetHashCode is that if X equals Y, then MyComparer.GetHashCode(X) equals MyComparer.GetHashCode(Y);
If X is not equal to Y, then you don't know whether their hashcodes will be different, although it would be nice if so, because code will be more efficient.
GetHashcode is meant to be fast, it doesn't have to check everything, it might be handy if it separates most elements, but it does not have to be a complete equality check.
How about this one:
public override int GetHashCode(Node x)
{
if (x == null) return 874283; // just a number
// for HashCode only use the NodeName:
return x.NodeName.GetHashCode();
}
Or, if you use a string comparer in method Equals for NodeName:
private static readonly IEqualityComparer<string> nodeNameComparer = StringComparer.OrdinalIgnoreCase;
// this comparer is used in Equals
public override int GetHashCode(Node x)
{
if (x == null) return 874283; // just a number
return nodenameComparer.GetHashCode(x.NodeName);
}
So if in future you change the comparison method for the nodename to CurrentCulture, then both Equals and GetHashCode will use the proper comparer.
Node a = new Node {nodeName= "X", DataLength = 1 };
Node b = new Node {nodeName= "X", DataLength = 1 };
Node c = new Node {nodeName= "X", DataLength = 2 };
Node d = new Node {nodeName= "Y", DataLength = 1 };
It is easy to see, that b equals a. c and d are different than a.
Although c is different, the comparer will return the same hashcode as for a.
So GetHashCode is not enough for exact equality, but a good GetHashCode will separate most different objects.
Use a IEqualityComparer like below.
class NodeComparer : IEqualityComparer<node>
{
public bool Equals(node? x, node? y)
{
if(x == null && y == null){
return true;
}
if(x == null || y == null)
{
return false;
}
return string.Equals(x.nodeName, y.nodeName) && x.DataLength == y.DataLength;
}
public int GetHashCode([DisallowNull] node obj)
{
return obj.nodeName.GetHashCode() * obj.DataLength.GetHashCode();
}
}
and then use it in the SequenceEquals
inputNodes.SequenceEqual(inputNodes2, new NodeComparer());

Evaluate logic expression contained in a passed in string based on a list of strings

So I am unsure how to word this question properly.
If you look below lets say I have a list of options (PG , PA, PM, TK, TD) that a customer has ordered. Now lets say I have some expression I need evaluate against the customers ordered options such as: PA OR PB where evaluates to customers list of options contains either option PA or PB. Seems simple enough but these expressions could grow quite a bit. Below in the code are some good examples of what I would like to accomplish.
I by no means claim to have knowledge about string parsing and comparing logical operations. I am looking for some general direction or what my options are. I saw some things about dynamic linq, rule engines, expression trees, etc. Its just a whole lot to absorb and am looking for some direction on which would accomplish what I want?
I am open to just about any approach. Any answers are appreciated!
Code:
class Program
{
static void Main(string[] args)
{
//Reprsents what the customer selected
List<string> CustomerSelectedOptins = new List<string> { "PG", "PA", "PM", "TK", "TD" };
string LogicOperation = "PA OR (PB AND PC)"; //Evaluates true since customer list contains PA
string LogicOperation2 = "PF OR (NOT PB AND PM)"; //Evaluates true since customer list does not contain PB but contain PM
string LogicOperation3 = "NOT PG AND NOT(PL AND TZ)"; //Evaluates false since the customer does have PG selected
}
}
There are a few different approaches you can take. One common one is to replace the option checks with true or false and then use one of the built-in expression evaluators to evaluate the resulting expression. Note: XOR is not available for these.
public static class Evaluator {
static Regex wordRE = new Regex(#"[A-Z]+", RegexOptions.Compiled);
static HashSet<string> Operators = new[] { "AND", "OR", "NOT" }.ToHashSet(StringComparer.OrdinalIgnoreCase);
public static bool Evaluate(this List<string> options, string op) {
var opListOfOptions = wordRE.Matches(op).Select(m => m.Value).Where(w => !Operators.Contains(w));
foreach (var option in opListOfOptions) {
var value = options.Contains(option).ToString();
op = op.Replace(option, value);
}
//return DTEval(op) == 1;
return CompEval(op);
//return XEval(op);
}
static double DTEval(string expression) {
var dt = new DataTable();
var loDataColumn = new DataColumn("Eval", typeof(double), expression);
dt.Columns.Add(loDataColumn);
dt.Rows.Add(0);
return (double)(dt.Rows[0]["Eval"]);
}
static DataTable cDT = new DataTable();
static bool CompEval(string expression) {
return (bool)cDT.Compute(expression, "");
}
public static bool XEval(string expression) {
expression = new System.Text.RegularExpressions.Regex(#"not +(true|false)").Replace(expression.ToLower(), " not(${1}) ");
expression = new System.Text.RegularExpressions.Regex(#"(true|false)").Replace(expression, " ${1}() ");
return (bool)new System.Xml.XPath.XPathDocument(new System.IO.StringReader("<r/>")).CreateNavigator()
.Evaluate(String.Format("boolean({0})", expression));
}
}
The Evaluate method comments show the different options you could use.
Alternatively, you could write your own expression evaluator. A simple one is a recursive descent evaluator that parses a simple grammar. This one uses C# precedence rules, having OR/XOR/AND binding left to right.
public class Evaluator {
// recursive descent boolean expression evaluator
// grammer:
// primary = id
// primary = ( expr )
// unop = primary
// unop = not unop
// andop = unop [ and unop ]*
// xorop = andop [ xor andop ]*
// orop = xorop [ or xorop ]*
// expr = orop
public class TokenList {
List<string> tokens;
int curTokenNum;
static Regex tokenRE = new Regex(#"\w+|[()]", RegexOptions.Compiled);
public TokenList(string expr) {
curTokenNum = 0;
tokens = tokenRE.Matches(expr).Select(m => m.Value).ToList();
}
public string CurToken => curTokenNum < tokens.Count ? tokens[curTokenNum] : String.Empty;
public void MoveNext() => ++curTokenNum;
public bool MoreTokens => curTokenNum < tokens.Count;
public void NextToken() {
MoveNext();
if (!MoreTokens)
throw new InvalidExpressionException("Expected token");
}
}
static List<string> OperatorStrings = new[] { "AND", "OR", "XOR", "NOT" }.ToList();
enum Operators { and, or, xor, not };
static List<string> ParenStrings = new[] { "(", ")" }.ToList();
enum Parens { open, close };
TokenList tokens;
List<string> trueOptions;
public Evaluator(List<string> trueOptions) {
this.trueOptions = trueOptions;
}
string curToken => tokens.CurToken;
bool curTokenValue => trueOptions.Contains(curToken);
bool isOperator => OperatorStrings.FindIndex(s => s.Equals(curToken, StringComparison.OrdinalIgnoreCase)) != -1;
Operators curOp => (Operators)OperatorStrings.FindIndex(s => s.Equals(curToken, StringComparison.OrdinalIgnoreCase));
bool isParen => ParenStrings.Contains(curToken);
Parens curParen => (Parens)(ParenStrings.IndexOf(curToken));
public bool id() {
if (isOperator)
throw new InvalidExpressionException("missing operand");
else {
var ans = curTokenValue;
tokens.MoveNext();
return ans;
}
}
bool primary() {
if (isParen)
if (curParen == Parens.open) {
tokens.NextToken();
var ans = expr();
if (!isParen || curParen != Parens.close)
throw new InvalidExpressionException($"missing ) at {curToken}");
else
tokens.MoveNext();
return ans;
}
else
throw new InvalidExpressionException("Invalid )");
else
return id();
}
bool unop() {
if (isOperator && curOp == Operators.not) {
tokens.NextToken();
return !unop();
}
else
return primary();
}
bool andop() {
var ans = unop();
while (tokens.MoreTokens && isOperator && curOp == Operators.and) {
tokens.NextToken();
ans = ans & unop();
}
return ans;
}
bool xorop() {
var ans = andop();
while (tokens.MoreTokens && isOperator && curOp == Operators.xor) {
tokens.NextToken();
ans = ans ^ andop();
}
return ans;
}
bool orop() {
var ans = xorop();
while (tokens.MoreTokens && isOperator && curOp == Operators.or) {
tokens.NextToken();
ans = ans | xorop();
}
return ans;
}
bool expr() => orop();
public bool Value(string exp) {
tokens = new TokenList(exp);
var ans = expr();
if (tokens.MoreTokens)
throw new InvalidExpressionException($"Unrecognized token {curToken} after expression");
return ans;
}
}
You can call it by creating an Evaluator and passing it an expression to evaluate:
var eval = new Evaluator(CustomerSelectedOptions);
var ans = eval.Value(LogicOperation);

Lucene DuplicateFilter problems

I'm using Lucene to search over this table of objects:
name | category
============|==========
John Smith | Dogs
John Smith | Cats
I'm using DuplicateFilter to get only one result for each person.
My problem is when I'm searching for the term "John Smith Dogs" with the DuplicateFilter I get no results. Is there an easy solution for this problem?
I came across this issue recently with the multiple segments not removing duplicates across boundaries. I wrote a little hack in my own custom DuplicateFilter class to cross the boundaries the first time it is called and return the cached bit sets with subsequent calls. I only implemented the CorrectBits (used for PM_FULL_VALIDATION) method but it should be trivial to implement the FastBits (used for PM_FAST_INVALIDATION) one as well based on the original code.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using Lucene.Net.Index;
using Lucene.Net.Util;
namespace Lucene.Net.Search
{
public class DuplicateFilter : Filter
{
public static int KM_USE_FIRST_OCCURRENCE = 1;
public static int KM_USE_LAST_OCCURRENCE = 2;
public static int PM_FAST_INVALIDATION = 2;
public static int PM_FULL_VALIDATION = 1;
private string fieldName;
/*
* KeepMode determines which document id to consider as the master, all others being
* identified as duplicates. Selecting the "first occurrence" can potentially save on IO.
*/
private int keepMode = KM_USE_FIRST_OCCURRENCE;
/*
* "Full" processing mode starts by setting all bits to false and only setting bits
* for documents that contain the given field and are identified as none-duplicates.
* "Fast" processing sets all bits to true then unsets all duplicate docs found for the
* given field. This approach avoids the need to read TermDocs for terms that are seen
* to have a document frequency of exactly "1" (i.e. no duplicates). While a potentially
* faster approach , the downside is that bitsets produced will include bits set for
* documents that do not actually contain the field given.
*
*/
private int processingMode = PM_FULL_VALIDATION;
private object SetupLock = new object();
public DuplicateFilter(string fieldName) : this(fieldName, KM_USE_LAST_OCCURRENCE, PM_FULL_VALIDATION)
{
}
public DuplicateFilter(string fieldName, int keepMode, int processingMode)
{
this.fieldName = fieldName;
this.keepMode = keepMode;
this.processingMode = processingMode;
}
public string FieldName
{
get => fieldName;
set => fieldName = value;
}
public int KeepMode
{
get => keepMode;
set => keepMode = value;
}
public int ProcessingMode
{
get => processingMode;
set => processingMode = value;
}
private IDictionary<string, (OpenBitSet Filtered, OpenBitSet Hit)> SegmentBits
{
get;
set;
} = new Dictionary<string, (OpenBitSet Filtered, OpenBitSet Hit)>();
private bool SetupComplete
{
get;
set;
} = false;
public override bool Equals(object obj)
{
if (this == obj) {
return true;
}
if ((obj == null) || (obj.GetType() != GetType())) {
return false;
}
var other = (DuplicateFilter)obj;
return keepMode == other.keepMode &&
processingMode == other.processingMode &&
(fieldName == other.fieldName || (fieldName != null && fieldName.Equals(other.fieldName)));
}
public override DocIdSet GetDocIdSet(IndexReader reader)
{
if (processingMode == PM_FAST_INVALIDATION) {
return FastBits(reader);
} else {
return CorrectBits(reader);
}
}
public override int GetHashCode()
{
var hash = 217;
hash = 31 * hash + keepMode;
hash = 31 * hash + processingMode;
hash = 31 * hash + fieldName.GetHashCode();
return hash;
}
private OpenBitSet CorrectBits(IndexReader reader)
{
SetupCorrect(reader);
return SegmentBits[((SegmentReader)reader).GetSegmentName()].Filtered;
}
private OpenBitSet FastBits(IndexReader reader)
{
throw new NotImplementedException();
}
private void SetupCorrect(IndexReader reader)
{
lock (SetupLock) {
if (!SetupComplete) {
// Get segment readers and bitsets.
var segmentReaders = new Dictionary<string, SegmentReader>();
var sis = new SegmentInfos();
sis.Read(reader.Directory());
foreach (SegmentInfo si in sis) {
var r = SegmentReader.Get(true, si, 1);
segmentReaders.Add(si.name, r);
SegmentBits.Add(si.name, (new OpenBitSet(r.MaxDoc()), new OpenBitSet(r.MaxDoc())));
}
// Determine duplicates across segments.
foreach (var outerKvp in segmentReaders) {
var startTerm = new Term(fieldName);
var te = outerKvp.Value.Terms(startTerm);
if (te != null) {
var currTerm = te.Term();
while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) {
var set = false;
var lastKey = outerKvp.Key;
var lastDoc = -1;
foreach (var innerKvp in segmentReaders) {
var td = innerKvp.Value.TermDocs(currTerm);
if (td.Next()) {
var doc = td.Doc();
if (SegmentBits[innerKvp.Key].Hit.Get(doc)) {
// Term has already been hit; skip it.
set = true;
continue;
}
// Keep track of which terms have already been hit.
SegmentBits[innerKvp.Key].Hit.Set(doc);
if (set) {
// Only need to set the first term as hit in each segment.
continue;
} else if (keepMode == KM_USE_FIRST_OCCURRENCE) {
SegmentBits[innerKvp.Key].Filtered.Set(doc);
set = true;
} else {
do {
lastDoc = td.Doc();
lastKey = innerKvp.Key;
} while (td.Next());
}
}
}
if (!set && keepMode == KM_USE_LAST_OCCURRENCE) {
SegmentBits[lastKey].Filtered.Set(lastDoc);
}
if (!te.Next()) {
break;
}
currTerm = te.Term();
}
}
}
// Mark setup as complete.
SetupComplete = true;
}
}
}
}
}

Bin Tree Post Order Traversal, No recursion, no node flag

Is there another way to do this? Just spent 2 hours trying to figure it out. I have a solution (see DumpPostOrder below) however, is there is a better or more efficient method? It feels like there may be. Rules are - no recursion, and the nodes cannot have a visited flag. Ie, you can only use left + right members.
My approach was to destroy the tree in the process. By setting the children of each side to null you can mark the node as traversed once, but I'm also looking at each node with children twice :(. Is there a better faster way? (Comments on my preorder and inorder implementations are appreciated but not necessary (ie, will vote, but not mark answer). Thanks!
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace BinaryTreeNoRecursion
{
public class TreeNode<T>
{
public T Value { get; set; }
public TreeNode<T> Left { get; set; }
public TreeNode<T> Right { get; set; }
public TreeNode(T inValue)
{
Value = inValue;
}
public TreeNode(TreeNode<T> left, TreeNode<T> right, T inValue)
{
Left = left;
Right = right;
Value = inValue;
}
}
public class BinaryTree<T>
{
private TreeNode<T> root;
public TreeNode<T> Root
{
get { return root; }
}
public BinaryTree(TreeNode<T> inRoot)
{
root = inRoot;
}
public void DumpPreOrder(T[] testme)
{
Stack<TreeNode<T>> stack = new Stack<TreeNode<T>>();
stack.Push(root);
int count =0;
while (true)
{
if (stack.Count == 0) break;
TreeNode<T> temp = stack.Pop();
if (!testme[count].Equals(temp.Value)) throw new Exception("fail");
if (temp.Right != null)
{
stack.Push(temp.Right);
}
if (temp.Left != null)
{
stack.Push(temp.Left);
}
count++;
}
}
public void DumpPostOrder(T[] testme)
{
Stack<TreeNode<T>> stack = new Stack<TreeNode<T>>();
TreeNode<T> node = root;
TreeNode<T> temp;
int count = 0;
while(node!=null || stack.Count!=0)
{
if (node!=null)
{
if (node.Left!=null)
{
temp = node;
node = node.Left;
temp.Left = null;
stack.Push(temp);
}
else
if (node.Right !=null)
{
temp = node;
node = node.Right;
temp.Right= null;
stack.Push(temp);
}
else //if the children are null
{
if (!testme[count].Equals(node.Value)) throw new Exception("fail");
count++;
if (stack.Count != 0)
{
node = stack.Pop();
}
else
{
node = null;
}
}
}
}
}
public void DumpInOrder(T[] testme)
{
Stack<TreeNode<T>> stack = new Stack<TreeNode<T>>();
TreeNode<T> temp = root;
int count = 0;
while (stack.Count!=0 || temp!=null)
{
if (temp != null)
{
stack.Push(temp);
temp = temp.Left;
}
else
{
temp = stack.Pop();
if (!testme[count].Equals(temp.Value)) throw new Exception("fail");
count++;
temp = temp.Right;
}
}
}
}
class Program
{
static void Main(string[] args)
{
//create a simple tree
TreeNode<int> node = new TreeNode<int>(100);
node.Left = new TreeNode<int>(50);
node.Right = new TreeNode<int>(150);
node.Left.Left = new TreeNode<int>(25);
node.Left.Right = new TreeNode<int>(75);
node.Right.Left = new TreeNode<int>(125);
node.Right.Right = new TreeNode<int>(175);
node.Right.Left.Left = new TreeNode<int>(110);
int[] preOrderResult = { 100, 50, 25, 75, 150, 125, 110, 175};
int[] inOrderResult = { 25, 50, 75, 100, 110, 125, 150, 175};
int[] postOrderResult = { 25, 75, 50, 110, 125, 175, 150, 100 };
BinaryTree<int> binTree = new BinaryTree<int>(node);
//do the dumps, verify output
binTree.DumpPreOrder(preOrderResult);
binTree.DumpInOrder(inOrderResult);
binTree.DumpPostOrder(postOrderResult);
}
}
}
Seems to me that destroying the tree while traversing it is pretty brutal.
You are currently building a Collection of nodes visited.
You are marking nodes as visited by setting them to null.
Could you not instead check for visitation by checking for the node in your Collection? For efficiency you may need to not use a Stack, but that's an implementation detail.
You could map your binary tree to an array (similar to how you can map a heap to an array, as shown here), and do your post-order traversal there. The action of converting a binary tree to an array is probably going to utilize recursion, but if you're controlling how the tree is initially constructed (or if you're just looking for an intriguing thought), you could just construct it as an array, and trivialize your non-recursive post-order traversal (with no flags) problem.
Edit
I think this would be a viable option:
1) Keep a bi-directional linked list of pointers to nodes in the tree.
2) Start at the root node.
3) Append root pointer to list.
4) Go to right child.
5) Append current node pointer to list.
6) Repeat steps 4 and 5 until there doesn't exist a right child.
7) Write current node to post-order-traversal.
8) Set current node to last node in the list.
9) Go to left child.
10) Append current note pointer to list.
11) Repeat steps 4 through 10 until the list is empty.
Basically, this makes all of the nodes in the tree have a pointer to their parent.
Avoiding recursion in this case is probably a bad idea, as previously noted. The system call stack is designed to handle things like this. Destroying your tree is a form of marking nodes.
If you want to use your own stack, then you need to push a bit more more information than just the node. Remember that the system call stack contains the program counter as well as the function parameters (local variables as well bu that is not important here). We could push tuples of the form (PushMyChildren, node), (PrintMe, Node), and when we pop a node of the form (PushMyChildren, node) we push (PrintMe, Node), then (PushMyChildren, right child) and then (PushMyChildren, left child). If the left and right children don't exist don't push them. When we pop a node of the form (PrintMe, Node) we print the node. In pseudo C# (I don't know C# and don't have time to look up the correct types and Syntax).
public void DumpPostOrder(T[] testme)
{
enum StackState {printNode, pushChildren}
Stack< Pair<StackState, TreeNode<T> > > stack = new Stack< Tuple<StackState, TreeNode<T> > >();
stack.Push(new Pair(pushChildren, root);
while ( stack.Count != 0 ) {
Pair<StackState, TreeNode<T> > curr = stack.pop();
if (curr.First == printNode) {
// process the node in curr.Second
} else {
node = curr.Second;
stack.Push(new Pair(printNode, node));
if (node.Right != null) {
stack.Push(new Pair(pushChildren, node.Right))
}
if (node.Left != null) {
stack.Push(new Pair(pushChildren, node.Left))
}
}
}
I just made post-order in Java using traversal to width (using queue).
private void init(){
if (initialized) return;
stack = new Stack<>();
stack.push(root);
travers(root.right);
travers(root.left);
initialized = true;
}
private void travers(Node node){
if (node == null) return;
Queue<Node> queue = new LinkedList<>();
queue.add(node);
while (!queue.isEmpty()){
Node temp = queue.poll();
stack.push(temp);
if (temp.right != null) queue.add(temp.right);
if (temp.left != null) queue.add(temp.left);
}
}
public T next() {
return stack.pop().data;
}

Decorate-Sort-Undecorate, how to sort an alphabetic field in descending order

I've got a large set of data for which computing the sort key is fairly expensive. What I'd like to do is use the DSU pattern where I take the rows and compute a sort key. An example:
Qty Name Supplier
Row 1: 50 Widgets IBM
Row 2: 48 Thingies Dell
Row 3: 99 Googaws IBM
To sort by Quantity and Supplier I could have the sort keys: 0050 IBM, 0048 Dell, 0099 IBM. The numbers are right-aligned and the text is left-aligned, everything is padded as needed.
If I need to sort by the Quanty in descending order I can just subtract the value from a constant (say, 10000) to build the sort keys: 9950 IBM, 9952 Dell, 9901 IBM.
How do I quickly/cheaply build a descending key for the alphabetic fields in C#?
[My data is all 8-bit ASCII w/ISO 8859 extension characters.]
Note: In Perl, this could be done by bit-complementing the strings:
$subkey = $string ^ ( "\xFF" x length $string );
Porting this solution straight into C# doesn't work:
subkey = encoding.GetString(encoding.GetBytes(stringval).
Select(x => (byte)(x ^ 0xff)).ToArray());
I suspect because of the differences in the way that strings are handled in C#/Perl. Maybe Perl is sorting in ASCII order and C# is trying to be smart?
Here's a sample piece of code that tries to accomplish this:
System.Text.ASCIIEncoding encoding = new System.Text.ASCIIEncoding();
List<List<string>> sample = new List<List<string>>() {
new List<string>() { "", "apple", "table" },
new List<string>() { "", "apple", "chair" },
new List<string>() { "", "apple", "davenport" },
new List<string>() { "", "orange", "sofa" },
new List<string>() { "", "peach", "bed" },
};
foreach(List<string> line in sample)
{
StringBuilder sb = new StringBuilder();
string key1 = line[1].PadRight(10, ' ');
string key2 = line[2].PadRight(10, ' ');
// Comment the next line to sort desc, desc
key2 = encoding.GetString(encoding.GetBytes(key2).
Select(x => (byte)(x ^ 0xff)).ToArray());
sb.Append(key2);
sb.Append(key1);
line[0] = sb.ToString();
}
List<List<string>> output = sample.OrderBy(p => p[0]).ToList();
return;
You can get to where you want, although I'll admit I don't know whether there's a better overall way.
The problem you have with the straight translation of the Perl method is that .NET simply will not allow you to be so laissez-faire with encoding. However, if as you say your data is all printable ASCII (ie consists of characters with Unicode codepoints in the range 32..127) - note that there is no such thing as '8-bit ASCII' - then you can do this:
key2 = encoding.GetString(encoding.GetBytes(key2).
Select(x => (byte)(32+95-(x-32))).ToArray());
In this expression I have been explicit about what I'm doing:
Take x (which I assume to be in 32..127)
Map the range to 0..95 to make it zero-based
Reverse by subtracting from 95
Add 32 to map back to the printable range
It's not very nice but it does work.
Just write an IComparer that would work as a chain of comparators.
In case of equality on each stage, it should pass eveluation to the next key part. If it's less then, or greater then, just return.
You need something like this:
int comparision = 0;
foreach(i = 0; i < n; i++)
{
comparision = a[i].CompareTo(b[i]) * comparisionSign[i];
if( comparision != 0 )
return comparision;
}
return comparision;
Or even simpler, you can go with:
list.OrderBy(i=>i.ID).ThenBy(i=>i.Name).ThenByDescending(i=>i.Supplier);
The first call return IOrderedEnumerable<>, the which can sort by additional fields.
Answering my own question (but not satisfactorily). To construct a descending alphabetic key I used this code and then appended this subkey to the search key for the object:
if ( reverse )
subkey = encoding.GetString(encoding.GetBytes(subkey)
.Select(x => (byte)(0x80 - x)).ToArray());
rowobj.sortKey.Append(subkey);
Once I had the keys built, I couldn't just do this:
rowobjList.Sort();
Because the default comparator isn't in ASCII order (which my 0x80 - x trick relies on). So then I had to write an IComparable<RowObject> that used the Ordinal sorting:
public int CompareTo(RowObject other)
{
return String.Compare(this.sortKey, other.sortKey,
StringComparison.Ordinal);
}
This seems to work. I'm a little dissatisfied because it feels clunky in C# with the encoding/decoding of the string.
If a key computation is expensive, why compute a key at all? String comparision by itself is not free, it's actually expensive loop through the characters and is not going to perform any better then a custom comparision loop.
In this test custom comparision sort performs about 3 times better then DSU.
Note that DSU key computation is not measured in this test, it's precomputed.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using Microsoft.VisualStudio.TestTools.UnitTesting;
namespace DSUPatternTest
{
[TestClass]
public class DSUPatternPerformanceTest
{
public class Row
{
public int Qty;
public string Name;
public string Supplier;
public string PrecomputedKey;
public void ComputeKey()
{
// Do not need StringBuilder here, String.Concat does better job internally.
PrecomputedKey =
Qty.ToString().PadLeft(4, '0') + " "
+ Name.PadRight(12, ' ') + " "
+ Supplier.PadRight(12, ' ');
}
public bool Equals(Row other)
{
if (ReferenceEquals(null, other)) return false;
if (ReferenceEquals(this, other)) return true;
return other.Qty == Qty && Equals(other.Name, Name) && Equals(other.Supplier, Supplier);
}
public override bool Equals(object obj)
{
if (ReferenceEquals(null, obj)) return false;
if (ReferenceEquals(this, obj)) return true;
if (obj.GetType() != typeof (Row)) return false;
return Equals((Row) obj);
}
public override int GetHashCode()
{
unchecked
{
int result = Qty;
result = (result*397) ^ (Name != null ? Name.GetHashCode() : 0);
result = (result*397) ^ (Supplier != null ? Supplier.GetHashCode() : 0);
return result;
}
}
}
public class RowComparer : IComparer<Row>
{
public int Compare(Row x, Row y)
{
int comparision;
comparision = x.Qty.CompareTo(y.Qty);
if (comparision != 0) return comparision;
comparision = x.Name.CompareTo(y.Name);
if (comparision != 0) return comparision;
comparision = x.Supplier.CompareTo(y.Supplier);
return comparision;
}
}
[TestMethod]
public void CustomLoopIsFaster()
{
var random = new Random();
var rows = Enumerable.Range(0, 5000).Select(i =>
new Row
{
Qty = (int) (random.NextDouble()*9999),
Name = random.Next().ToString(),
Supplier = random.Next().ToString()
}).ToList();
foreach (var row in rows)
{
row.ComputeKey();
}
var dsuSw = Stopwatch.StartNew();
var sortedByDSU = rows.OrderBy(i => i.PrecomputedKey).ToList();
var dsuTime = dsuSw.ElapsedMilliseconds;
var customSw = Stopwatch.StartNew();
var sortedByCustom = rows.OrderBy(i => i, new RowComparer()).ToList();
var customTime = customSw.ElapsedMilliseconds;
Trace.WriteLine(dsuTime);
Trace.WriteLine(customTime);
CollectionAssert.AreEqual(sortedByDSU, sortedByCustom);
Assert.IsTrue(dsuTime > customTime * 2.5);
}
}
}
If you need to build a sorter dynamically you can use something like this:
var comparerChain = new ComparerChain<Row>()
.By(r => r.Qty, false)
.By(r => r.Name, false)
.By(r => r.Supplier, false);
var sortedByCustom = rows.OrderBy(i => i, comparerChain).ToList();
Here is a sample implementation of comparer chain builder:
public class ComparerChain<T> : IComparer<T>
{
private List<PropComparer<T>> Comparers = new List<PropComparer<T>>();
public int Compare(T x, T y)
{
foreach (var comparer in Comparers)
{
var result = comparer._f(x, y);
if (result != 0)
return result;
}
return 0;
}
public ComparerChain<T> By<Tp>(Func<T,Tp> property, bool descending) where Tp:IComparable<Tp>
{
Comparers.Add(PropComparer<T>.By(property, descending));
return this;
}
}
public class PropComparer<T>
{
public Func<T, T, int> _f;
public static PropComparer<T> By<Tp>(Func<T,Tp> property, bool descending) where Tp:IComparable<Tp>
{
Func<T, T, int> ascendingCompare = (a, b) => property(a).CompareTo(property(b));
Func<T, T, int> descendingCompare = (a, b) => property(b).CompareTo(property(a));
return new PropComparer<T>(descending ? descendingCompare : ascendingCompare);
}
public PropComparer(Func<T, T, int> f)
{
_f = f;
}
}
It works a little bit slower, maybe because of property binging delegate calls.

Categories