[jboss-svn-commits] JBL Code SVN: r19371 - in labs/jbossrules/contrib/machinelearning/decisiontree/src: dt/builder and 3 other directories.

jboss-svn-commits at lists.jboss.org jboss-svn-commits at lists.jboss.org
Tue Apr 1 19:53:50 EDT 2008


Author: gizil
Date: 2008-04-01 19:53:50 -0400 (Tue, 01 Apr 2008)
New Revision: 19371

Added:
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactTargetDistribution.java
Modified:
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/DecisionTree.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/LeafNode.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/C45TreeBuilder.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilder.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilderMT.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/Entropy.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/IDTreeBuilder.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/BooleanDomain.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainSpec.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactDistribution.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSetFactory.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FactProcessor.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FileProcessor.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/ObjectReader.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/RulePrinter.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/Util.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukFileExample.java
   labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukObjectExample.java
Log:
before recursive discretization

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/DecisionTree.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/DecisionTree.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/DecisionTree.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -6,6 +6,7 @@
 
 import dt.memory.Domain;
 import dt.memory.Fact;
+import dt.memory.FactTargetDistribution;
 import dt.tools.Util;
 
 public class DecisionTree {
@@ -76,6 +77,15 @@
 		// *OPT* }
 		return facts_in_class;
 	}
+	
+	// *OPT* public double getInformation(List<FactSet> facts) {
+	public FactTargetDistribution getDistribution(List<Fact> facts) {
+		
+		FactTargetDistribution facts_in_class = new FactTargetDistribution(getDomain(getTarget()));
+		facts_in_class.calculateDistribution(facts);
+		FACTS_READ += facts.size();
+		return facts_in_class;
+	}
 
 	// *OPT* public double getInformation(List<FactSet> facts) {
 	/**

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/LeafNode.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/LeafNode.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/LeafNode.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -8,10 +8,12 @@
 	//represents leaf nodes with the target value
 	private Object targetValue;
 	private double rank;
+	private int num_facts_classified;
 
 	public LeafNode(Domain<?> targetDomain, Object value){
 		super(targetDomain);
 		this.targetValue = value;
+		num_facts_classified = 0;
 	}
 	
 	public void addNode(Object attributeValue, TreeNode node) {
@@ -43,4 +45,13 @@
 		buf.append("DECISION -> " +targetValue.toString()+"\n");
 		return buf.toString();
 	}
+
+	public void setNumSupporter(int size) {
+		this.num_facts_classified= size;
+		
+	}
+	
+	public int getNum_facts_classified() {
+		return this.num_facts_classified;
+	}
 }

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/C45TreeBuilder.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/C45TreeBuilder.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/C45TreeBuilder.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -11,6 +11,7 @@
 import dt.LeafNode;
 import dt.TreeNode;
 
+import dt.memory.FactTargetDistribution;
 import dt.memory.WorkingMemory;
 import dt.memory.Fact;
 import dt.memory.FactSet;
@@ -39,8 +40,9 @@
 
 	MyThread helper;
 	private int FUNC_CALL = 0;
-	private int num_fact_processed = 0;
-
+	protected int num_fact_processed = 0;
+	private ArrayList<Fact> unclassified_facts;
+	
 	/*
 	 * treebuilder.execute(workingmemory, classtoexecute, attributestoprocess)
 	 * 
@@ -50,9 +52,18 @@
 	 * internalprocess(attributestoprocess)
 	 */
 
+	public int getNum_fact_processed() {
+		return num_fact_processed;
+	}
+
+	public void setNum_fact_processed(int num_fact_processed) {
+		this.num_fact_processed = num_fact_processed;
+	}
+
 	public DecisionTree build(WorkingMemory wm, Class<?> klass,
-			String targetField, Collection<String> workingAttributes) {
-
+			String targetField, List<String> workingAttributes) {
+		
+		unclassified_facts = new ArrayList<Fact>();
 		DecisionTree dt = new DecisionTree(klass.getName());
 		// **OPT List<FactSet> facts = new ArrayList<FactSet>();
 		ArrayList<Fact> facts = new ArrayList<Fact>();
@@ -94,8 +105,8 @@
 	}
 
 	public DecisionTree build(WorkingMemory wm, String klass,
-			String targetField, Collection<String> workingAttributes) {
-
+			String targetField, List<String> workingAttributes) {
+		unclassified_facts = new ArrayList<Fact>();
 		DecisionTree dt = new DecisionTree(klass);
 		// **OPT List<FactSet> facts = new ArrayList<FactSet>();
 		ArrayList<Fact> facts = new ArrayList<Fact>();
@@ -116,7 +127,7 @@
 
 		if (workingAttributes != null)
 			for (String attr : workingAttributes) {
-				System.out.println("Bok degil " + attr);
+				//System.out.println("Bok degil " + attr);
 				dt.addDomain(klass_fs.getDomain(attr));
 			}
 		else
@@ -143,8 +154,27 @@
 		}
 		/* let's get the statistics of the results */
 		// List<?> targetValues = dt.getPossibleValues(dt.getTarget());
-		Hashtable<Object, Integer> stats = dt.getStatistics(facts, dt
-				.getTarget());// targetValues
+		//Hashtable<Object, Integer> stats_ = dt.getStatistics(facts, dt.getTarget());// targetValues
+		
+		//FactTargetDistribution stats = dt.getDistribution(facts);
+		
+		FactTargetDistribution stats = new FactTargetDistribution(dt.getDomain(dt.getTarget()));
+		stats.calculateDistribution(facts);
+	
+		stats.evaluateMajority();
+//		
+//		Object winner1 = stats.getThe_winner_target_class();
+//		for (Object looser: stats.getTargetClasses()) {
+//			System.out.println(" the target class = "+ looser);
+//			if (!winner1.equals(looser) && stats.getVoteFor(looser)>0) {
+//				System.out.println(" the num of supporters = "+ stats.getVoteFor(looser));
+//				System.out.println(" but the guys "+ stats.getSupportersFor(looser));
+//				System.out.println("How many bok: "+stats.getSupportersFor(looser).size());
+//				//unclassified_facts.addAll(stats.getSupportersFor(looser));
+//			} else
+//				System.out.println(Util.ntimes("DANIEL", 5)+ "how many times not matching?? not a looser "+ looser );
+//		}
+		/*
 		Collection<Object> targetValues = stats.keySet();
 		int winner_vote = 0;
 		int num_supporters = 0;
@@ -159,24 +189,29 @@
 				winner = key;
 			}
 		}
+		*
 
 		/* if all elements are classified to the same value */
-		if (num_supporters == 1) {
-			// *OPT* return new
-			// LeafNode(facts.get(0).getFact(0).getFieldValue(target));
-			LeafNode classifiedNode = new LeafNode(
-					dt.getDomain(dt.getTarget()), winner);
+		if (stats.getNum_supported_target_classes() == 1) {
+
+			LeafNode classifiedNode = new LeafNode(dt.getDomain(dt.getTarget()), stats.getThe_winner_target_class());
 			classifiedNode.setRank((double) facts.size()/(double) num_fact_processed);
+			classifiedNode.setNumSupporter(facts.size());
+			
 			return classifiedNode;
 		}
 
 		/* if there is no attribute left in order to continue */
 		if (attributeNames.size() == 0) {
 			/* an heuristic of the leaf classification */
-			LeafNode noAttributeLeftNode = new LeafNode(dt.getDomain(dt
-					.getTarget()), winner);
-			noAttributeLeftNode.setRank((double) winner_vote
-					/ (double) num_fact_processed);
+			Object winner = stats.getThe_winner_target_class();
+			LeafNode noAttributeLeftNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+			noAttributeLeftNode.setRank((double) stats.getVoteFor(winner)/ (double) num_fact_processed);
+			noAttributeLeftNode.setNumSupporter(stats.getVoteFor(winner));
+			
+			/* we need to know how many guys cannot be classified and who these guys are */
+			FactProcessor.splitUnclassifiedFacts(unclassified_facts, stats);
+			
 			return noAttributeLeftNode;
 		}
 
@@ -190,6 +225,12 @@
 			
 		Hashtable<Object, List<Fact>> filtered_facts = FactProcessor.splitFacts(facts, choosenDomain);
 
+		for (Object value : filtered_facts.keySet()) {
+			if (filtered_facts.get(value).isEmpty()){
+				@SuppressWarnings("unused")
+				boolean bok = true;
+			}
+		}
 		dt.FACTS_READ += facts.size();
 
 		for (Object value : filtered_facts.keySet()) {
@@ -201,8 +242,9 @@
 
 			if (filtered_facts.get(value).isEmpty()) {
 				/* majority !!!! */
-				LeafNode majorityNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
-				majorityNode.setRank(0.0);
+				LeafNode majorityNode = new LeafNode(dt.getDomain(dt.getTarget()), stats.getThe_winner_target_class());
+				majorityNode.setRank(-1.0); // classifying nothing
+				majorityNode.setNumSupporter(filtered_facts.get(value).size());
 				currentNode.addNode(value, majorityNode);
 			} else {
 				TreeNode newNode = c45(dt, filtered_facts.get(value), attributeNames_copy);

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilder.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilder.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilder.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -1,6 +1,6 @@
 package dt.builder;
 
-import java.util.Collection;
+import java.util.List;
 
 import dt.DecisionTree;
 import dt.memory.WorkingMemory;
@@ -8,8 +8,8 @@
 public interface DecisionTreeBuilder {
 	
 	
-	DecisionTree build(WorkingMemory wm, Class<?> klass, String targetField, Collection<String> workingAttributes);
+	DecisionTree build(WorkingMemory wm, Class<?> klass, String targetField, List<String> workingAttributes);
 
-	DecisionTree build(WorkingMemory simple, String klass_name, String target_attr,Collection<String> workingAttributes);
+	DecisionTree build(WorkingMemory simple, String klass_name, String target_attr,List<String> workingAttributes);
 
 }

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilderMT.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilderMT.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilderMT.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -39,6 +39,8 @@
 	MyThread helper;
 	private int FUNC_CALL = 0;
 	private int num_fact_processed = 0;
+	
+	private List<Fact> unclassified_facts;
 
 	/* 
 	 * treebuilder.execute(workingmemory, classtoexecute, attributestoprocess)
@@ -52,6 +54,7 @@
 
 	public DecisionTree build(WorkingMemory wm, Class<?> klass, String targetField, Collection<String> workingAttributes) {
 
+		unclassified_facts = new ArrayList<Fact>();
 		DecisionTree dt = new DecisionTree(klass.getName());
 //		**OPT		List<FactSet> facts = new ArrayList<FactSet>();
 		ArrayList<Fact> facts = new ArrayList<Fact>();
@@ -106,6 +109,7 @@
 
 	public DecisionTree build(WorkingMemory wm, String klass, String targetField, Collection<String> workingAttributes) {
 
+		unclassified_facts = new ArrayList<Fact>();
 		DecisionTree dt = new DecisionTree(klass);
 //		**OPT		List<FactSet> facts = new ArrayList<FactSet>();
 		ArrayList<Fact> facts = new ArrayList<Fact>();
@@ -215,6 +219,7 @@
 			//*OPT*			return new LeafNode(facts.get(0).getFact(0).getFieldValue(target));
 			LeafNode classifiedNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
 			classifiedNode.setRank((double)facts.size()/(double)num_fact_processed);
+			classifiedNode.setNumSupporter(facts.size());
 			return classifiedNode;
 		}
 
@@ -223,6 +228,7 @@
 			/* an heuristic of the leaf classification*/
 			LeafNode noAttributeLeftNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
 			noAttributeLeftNode.setRank((double)winner_vote/(double)num_fact_processed);
+			noAttributeLeftNode.setNumSupporter(winner_vote);
 			return noAttributeLeftNode;
 		}
 

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/Entropy.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/Entropy.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/Entropy.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -12,15 +12,15 @@
 import dt.memory.Domain;
 import dt.memory.Fact;
 import dt.memory.FactDistribution;
+import dt.memory.FactTargetDistribution;
 import dt.tools.Util;
-import dt.memory.NumericDomain;
 
 public class Entropy implements InformationMeasure {
 	
 	public static Domain<?> chooseContAttribute(DecisionTree dt, List<Fact> facts,
-			Hashtable<Object, Integer> facts_in_class, List<String> attrs) {
+			FactTargetDistribution facts_in_class, List<String> attrs) {
 
-		double dt_info = calc_info(facts_in_class, facts.size());
+		double dt_info = calc_info(facts_in_class);
 		double greatestGain = -100000.0;
 		String attributeWithGreatestGain = attrs.get(0);
 		Domain attrDomain = dt.getDomain(attributeWithGreatestGain);
@@ -43,14 +43,14 @@
 				
 				attrDomain = dt.getDomain(attr).clone();
 				attrDomain.addPseudoValue(facts.get(facts.size()-1).getFieldValue(attr));
-				System.out.println("entropy.chooseContAttribute(1)*********** num of split for "+
-						attr+": "+ attrDomain.getValues().size()+ " ("+ attrDomain.getValues().get(0)+")");				
+//				System.out.println("entropy.chooseContAttribute(1)*********** num of split for "+
+//						attr+": "+ attrDomain.getValues().size()+ " ("+ attrDomain.getValues().get(0)+")");				
 				split_indices = new ArrayList<Integer>();
-				System.out.println("entropy.chooseContAttribute(BOK) size "+split_indices.size());
+				//System.out.println("entropy.chooseContAttribute(BOK) size "+split_indices.size());
 				gain = dt_info - info_contattr(facts, attrDomain, targetDomain, 
 												facts_in_class, split_indices, splits);
-				System.out.println("entropy.chooseContAttribute(2)*********** num of split for "+
-						attr+": "+ attrDomain.getValues().size());				
+//				System.out.println("entropy.chooseContAttribute(2)*********** num of split for "+
+//						attr+": "+ attrDomain.getValues().size());				
 			}
 			
 			if (gain > greatestGain) {				
@@ -64,7 +64,242 @@
 
 		return bestDomain;
 	}
+	public static double info_contattr(List<Fact> facts,
+			Domain splitDomain, Domain<?> targetDomain, 
+			FactTargetDistribution facts_in_class, 
+			List<Integer> split_indices,
+			List<Fact> split_facts) {
 	
+		String splitAttr = splitDomain.getName();
+		List<?> splitValues = splitDomain.getValues();
+		String targetAttr = targetDomain.getName();
+		List<?> targetValues = targetDomain.getValues();
+		if (Util.DEBUG) {
+			System.out.println("entropy.info_cont() attributeToSplit? " + splitAttr);
+			int f_i=0;
+			for(Fact f: facts) {
+				System.out.println("entropy.info_cont() SORTING: "+f_i+" attr "+splitAttr+ " "+ f );
+				f_i++;
+			}
+		}
+
+		if (facts.size() <= 1) {
+			System.out
+					.println("The size of the fact list is 0 oups??? exiting....");
+			System.exit(0);
+		}
+		if (split_facts.size() < 1) {
+			System.out
+					.println("The size of the splits is 0 oups??? exiting....");
+			System.exit(0);
+		}
+		
+		/* initialize the distribution */
+		Object key0 = Integer.valueOf(0);
+		Object key1 = Integer.valueOf(1);
+		List<Object> keys = new ArrayList<Object>(2);
+		keys.add(key0);
+		keys.add(key1);
+		
+		
+		FactDistribution facts_at_attribute = new FactDistribution(keys, targetValues);
+		facts_at_attribute.setTotal(facts.size());
+		facts_at_attribute.setTargetDistForAttr(key1, facts_in_class);
+		facts_at_attribute.setSumForAttr(key1, facts.size());
+		
+		double best_sum = -100000.0;
+		Object value_to_split = splitValues.get(0);
+		int split_index =1, index = 1;
+		Iterator<Fact> f_ite = facts.iterator();
+		Fact f1 = f_ite.next();
+		Comparator<Fact> targetComp = f1.getDomain(targetAttr).factComparator();
+		if (Util.DEBUG)	System.out.println("\nentropy.info_cont() SEARCHING: "+split_index+" attr "+splitAttr+ " "+ f1 );
+		while (f_ite.hasNext()) {/* 2. Look for potential cut-points. */
+
+			Fact f2 = f_ite.next();
+			if (Util.DEBUG) System.out.print("entropy.info_cont() SEARCHING: "+(index+1)+" attr "+splitAttr+ " "+ f2 );
+			Object targetKey = f2.getFieldValue(targetAttr);
+			
+			// System.out.println("My key: "+ targetKey.toString());
+			//for (Object attr_key : attr_values)
+			
+			/* every time it change the place in the distribution */
+			facts_at_attribute.change(key0, targetKey, +1);
+			facts_at_attribute.change(key1, targetKey, -1);
+	
+			/*
+			 * 2.1 Cut points are points in the sorted list above where the class labels change. 
+			 * Eg. if I had five instances with values for the attribute of interest and labels 
+			 * (1.0,A), (1.4,A), (1.7, A), (2.0,B), (3.0, B), (7.0, A), then there are only
+			 * two cutpoints of interest: 1.85 and 5 (mid-way between the points
+			 * where the classes change from A to B or vice versa).
+			 */
+			
+			if ( targetComp.compare(f1, f2)!=0) {
+				// the cut point
+				Number cp_i = (Number) f1.getFieldValue(splitAttr);
+				Number cp_i_next = (Number) f2.getFieldValue(splitAttr);
+
+				Number cut_point = (Double)(cp_i.doubleValue() + cp_i_next.doubleValue()) / 2;
+				
+				/*
+				 * 3. Evaluate your favourite disparity measure 
+				 * (info gain, gain ratio, gini coefficient, chi-squared test) on the cut point
+				 * and calculate its gain 
+				 */
+				double sum = calc_info_attr(facts_at_attribute);
+				//System.out.println("**entropy.info_contattr() FOUND: "+ sum + " best sum "+best_sum + 
+				if (Util.DEBUG) System.out.println("  **Try "+ sum + " best sum "+best_sum + 
+				" value ("+ f1.getFieldValue(splitAttr) +"-|"+ value_to_split+"|-"+ f2.getFieldValue(splitAttr)+")");
+				
+				if (sum > best_sum) {
+					best_sum = sum;
+					value_to_split = cut_point;
+					if (Util.DEBUG) System.out.println(Util.ntimes("?", 10)+"** FOUND: target ("+ f1.getFieldValue(targetAttr) +"-|T|-"+ f2.getFieldValue(targetAttr)+")");
+					split_index = index;
+				}
+			} else {}		
+			f1 = f2;
+			index++;
+		}
+		splitDomain.addPseudoValue(value_to_split);
+		Util.insert(split_indices, Integer.valueOf(split_index));
+		if (Util.DEBUG) {
+			System.out.println("entropy.info_contattr(BOK_last) split_indices.size "+split_indices.size());
+			for(Integer i : split_indices)
+				System.out.println("entropy.info_contattr(FOUNDS) split_indices "+i + " the fact "+facts.get(i));
+			System.out.println("entropy.chooseContAttribute(1.5)*********** num of split for "+
+					splitAttr+": "+ splitDomain.getValues().size());
+		}
+		return best_sum;
+	}
+	
+	public static double info_contattr_rec(List<Fact> facts,
+			Domain splitDomain, Domain<?> targetDomain, 
+			FactTargetDistribution facts_in_class, 
+			List<Integer> split_indices,
+			List<Fact> split_facts) {
+	
+		String splitAttr = splitDomain.getName();
+		List<?> splitValues = splitDomain.getValues();
+		String targetAttr = targetDomain.getName();
+		List<?> targetValues = targetDomain.getValues();
+		if (Util.DEBUG) {
+			System.out.println("entropy.info_cont() attributeToSplit? " + splitAttr);
+			int f_i=0;
+			for(Fact f: facts) {
+				System.out.println("entropy.info_cont() SORTING: "+f_i+" attr "+splitAttr+ " "+ f );
+				f_i++;
+			}
+		}
+
+		if (facts.size() <= 1) {
+			System.out
+					.println("The size of the fact list is 0 oups??? exiting....");
+			System.exit(0);
+		}
+		if (split_facts.size() < 1) {
+			System.out
+					.println("The size of the splits is 0 oups??? exiting....");
+			System.exit(0);
+		}
+		
+		/* initialize the distribution */
+		Object key0 = Integer.valueOf(0);
+		Object key1 = Integer.valueOf(1);
+		List<Object> keys = new ArrayList<Object>(2);
+		keys.add(key0);
+		keys.add(key1);
+		
+		
+		FactDistribution facts_at_attribute = new FactDistribution(keys, targetValues);
+		facts_at_attribute.setTotal(facts.size());
+		facts_at_attribute.setTargetDistForAttr(key1, facts_in_class);
+		facts_at_attribute.setSumForAttr(key1, facts.size());
+		
+		double best_sum = -100000.0;
+		Object value_to_split = splitValues.get(0);
+		int split_index =1, index = 1;
+		FactDistribution best_distribution;
+		Iterator<Fact> f_ite = facts.iterator();
+		Fact f1 = f_ite.next();
+		Comparator<Fact> targetComp = f1.getDomain(targetAttr).factComparator();
+		if (Util.DEBUG)	System.out.println("\nentropy.info_cont() SEARCHING: "+split_index+" attr "+splitAttr+ " "+ f1 );
+		while (f_ite.hasNext()) {/* 2. Look for potential cut-points. */
+
+			Fact f2 = f_ite.next();
+			if (Util.DEBUG) System.out.print("entropy.info_cont() SEARCHING: "+(index+1)+" attr "+splitAttr+ " "+ f2 );
+			Object targetKey = f2.getFieldValue(targetAttr);
+			
+			// System.out.println("My key: "+ targetKey.toString());
+			//for (Object attr_key : attr_values)
+			
+			/* every time it change the place in the distribution */
+			facts_at_attribute.change(key0, targetKey, +1);
+			facts_at_attribute.change(key1, targetKey, -1);
+	
+			/*
+			 * 2.1 Cut points are points in the sorted list above where the class labels change. 
+			 * Eg. if I had five instances with values for the attribute of interest and labels 
+			 * (1.0,A), (1.4,A), (1.7, A), (2.0,B), (3.0, B), (7.0, A), then there are only
+			 * two cutpoints of interest: 1.85 and 5 (mid-way between the points
+			 * where the classes change from A to B or vice versa).
+			 */
+			
+			if ( targetComp.compare(f1, f2)!=0) {
+				// the cut point
+				Number cp_i = (Number) f1.getFieldValue(splitAttr);
+				Number cp_i_next = (Number) f2.getFieldValue(splitAttr);
+
+				Number cut_point = (Double)(cp_i.doubleValue() + cp_i_next.doubleValue()) / 2;
+				
+				/*
+				 * 3. Evaluate your favourite disparity measure 
+				 * (info gain, gain ratio, gini coefficient, chi-squared test) on the cut point
+				 * and calculate its gain 
+				 */
+				double sum = calc_info_attr(facts_at_attribute);
+				//System.out.println("**entropy.info_contattr() FOUND: "+ sum + " best sum "+best_sum + 
+				if (Util.DEBUG) System.out.println("  **Try "+ sum + " best sum "+best_sum + 
+				" value ("+ f1.getFieldValue(splitAttr) +"-|"+ value_to_split+"|-"+ f2.getFieldValue(splitAttr)+")");
+				
+				if (sum > best_sum) {
+					best_sum = sum;
+					value_to_split = cut_point;
+					if (Util.DEBUG) System.out.println(Util.ntimes("?", 10)+"** FOUND: target ("+ f1.getFieldValue(targetAttr) +"-|T|-"+ f2.getFieldValue(targetAttr)+")");
+					split_index = index;
+					best_distribution = facts_at_attribute.clone();
+				}
+			} else {}		
+			f1 = f2;
+			index++;
+		}
+		splitDomain.addPseudoValue(value_to_split);
+		Util.insert(split_indices, Integer.valueOf(split_index));
+		/*
+		 * info_contattr_rec(List<Fact> facts,
+			Domain splitDomain, Domain<?> targetDomain, 
+			FactTargetDistribution facts_in_class, 
+			List<Integer> split_indices,
+			List<Fact> split_facts)
+		 */
+//		info_contattr_rec(facts.subList(0, split_index),
+//				splitDomain, targetDomain, 
+//				best_distribution.getAttrFor(key0), 
+//				split_indices,
+//				split_facts);
+		
+		
+		if (Util.DEBUG) {
+			System.out.println("entropy.info_contattr(BOK_last) split_indices.size "+split_indices.size());
+			for(Integer i : split_indices)
+				System.out.println("entropy.info_contattr(FOUNDS) split_indices "+i + " the fact "+facts.get(i));
+			System.out.println("entropy.chooseContAttribute(1.5)*********** num of split for "+
+					splitAttr+": "+ splitDomain.getValues().size());
+		}
+		return best_sum;
+	}
+	
 	/*
 	 * GLOBAL DISCRETIZATION a a b a b b b b b (target) 1 2 3 4 5 6 7 8 9 (attr
 	 * c) 0 0 0 0 1 1 1 1 1 "<5", ">=5" "true" "false"
@@ -90,7 +325,7 @@
 	 * instances of a single class or (b) some stopping criterion is reached. I
 	 * can't remember what stopping criteria they used.
 	 */
-	public static double info_contattr(List<Fact> facts,
+	public static double info_contattr_old (List<Fact> facts,
 			Domain splitDomain, Domain<?> targetDomain, 
 			Hashtable<Object, Integer> facts_in_class, 
 			List<Integer> split_indices,
@@ -100,11 +335,13 @@
 		List<?> splitValues = splitDomain.getValues();
 		String targetAttr = targetDomain.getName();
 		List<?> targetValues = targetDomain.getValues();
-		System.out.println("entropy.info_cont() attributeToSplit? " + splitAttr);
-		int f_i=0;
-		for(Fact f: facts) {
-			System.out.println("entropy.info_cont() SORTING: "+f_i+" attr "+splitAttr+ " "+ f );
-			f_i++;
+		if (Util.DEBUG) {
+			System.out.println("entropy.info_cont() attributeToSplit? " + splitAttr);
+			int f_i=0;
+			for(Fact f: facts) {
+				System.out.println("entropy.info_cont() SORTING: "+f_i+" attr "+splitAttr+ " "+ f );
+				f_i++;
+			}
 		}
 
 		if (facts.size() <= 1) {
@@ -137,11 +374,11 @@
 		Iterator<Fact> f_ite = facts.iterator();
 		Fact f1 = f_ite.next();
 		Comparator<Fact> targetComp = f1.getDomain(targetAttr).factComparator();
-		System.out.println("\nentropy.info_cont() SEARCHING: "+split_index+" attr "+splitAttr+ " "+ f1 );
+		if (Util.DEBUG)	System.out.println("\nentropy.info_cont() SEARCHING: "+split_index+" attr "+splitAttr+ " "+ f1 );
 		while (f_ite.hasNext()) {/* 2. Look for potential cut-points. */
 
 			Fact f2 = f_ite.next();
-			System.out.print("entropy.info_cont() SEARCHING: "+(index+1)+" attr "+splitAttr+ " "+ f2 );
+			if (Util.DEBUG) System.out.print("entropy.info_cont() SEARCHING: "+(index+1)+" attr "+splitAttr+ " "+ f2 );
 			Object targetKey = f2.getFieldValue(targetAttr);
 			
 			// System.out.println("My key: "+ targetKey.toString());
@@ -173,13 +410,13 @@
 				 */
 				double sum = calc_info_attr(facts_at_attribute);
 				//System.out.println("**entropy.info_contattr() FOUND: "+ sum + " best sum "+best_sum + 
-				System.out.println("  **Try "+ sum + " best sum "+best_sum + 
+				if (Util.DEBUG) System.out.println("  **Try "+ sum + " best sum "+best_sum + 
 				" value ("+ f1.getFieldValue(splitAttr) +"-|"+ value_to_split+"|-"+ f2.getFieldValue(splitAttr)+")");
 				
 				if (sum > best_sum) {
 					best_sum = sum;
 					value_to_split = cut_point;
-					System.out.println(Util.ntimes("?", 10)+"** FOUND: target ("+ f1.getFieldValue(targetAttr) +"-|T|-"+ f2.getFieldValue(targetAttr)+")");
+					if (Util.DEBUG) System.out.println(Util.ntimes("?", 10)+"** FOUND: target ("+ f1.getFieldValue(targetAttr) +"-|T|-"+ f2.getFieldValue(targetAttr)+")");
 					split_index = index;
 				}
 			} else {}		
@@ -188,11 +425,13 @@
 		}
 		splitDomain.addPseudoValue(value_to_split);
 		Util.insert(split_indices, Integer.valueOf(split_index));
-		System.out.println("entropy.info_contattr(BOK_last) split_indices.size "+split_indices.size());
-		for(Integer i : split_indices)
-			System.out.println("entropy.info_contattr(FOUNDS) split_indices "+i + " the fact "+facts.get(i));
-		System.out.println("entropy.chooseContAttribute(1.5)*********** num of split for "+
-				splitAttr+": "+ splitDomain.getValues().size());
+		if (Util.DEBUG) {
+			System.out.println("entropy.info_contattr(BOK_last) split_indices.size "+split_indices.size());
+			for(Integer i : split_indices)
+				System.out.println("entropy.info_contattr(FOUNDS) split_indices "+i + " the fact "+facts.get(i));
+			System.out.println("entropy.chooseContAttribute(1.5)*********** num of split for "+
+					splitAttr+": "+ splitDomain.getValues().size());
+		}
 		return best_sum;
 	}
 	
@@ -216,7 +455,7 @@
 			} else {
 				gain = dt_info - info_attr(facts, dt.getDomain(attr), targetDomain);
 			}
-			System.out.println("Attribute: " + attr + " the gain: " + gain);
+			if (Util.DEBUG)	System.out.println("Attribute: " + attr + " the gain: " + gain);
 			if (gain > greatestGain) {
 				greatestGain = gain;
 				attributeWithGreatestGain = attr;
@@ -235,7 +474,7 @@
 		String target = targetDomain.getName();
 		List<?> targetValues = targetDomain.getValues();
 		
-		System.out.println("What is the attributeToSplit? " + attributeToSplit);
+		if (Util.DEBUG) System.out.println("What is the attributeToSplit? " + attributeToSplit);
 
 		/* initialize the hashtable */
 		FactDistribution facts_at_attribute = new FactDistribution(attributeValues, targetValues);
@@ -295,6 +534,25 @@
 		}
 		return sum;
 	}
+	/* you can calculate this before */
+	public static double calc_info(FactTargetDistribution facts_in_class) {
+		
+		int total_num_facts = facts_in_class.getSum();
+		Collection<Object> targetValues = facts_in_class.getTargetClasses();
+		double prob, sum = 0;
+		for (Object key : targetValues) {
+			int num_in_class = facts_in_class.getVoteFor(key);
+			// System.out.println("num_in_class : "+ num_in_class + " key "+ key+ " and the total num "+ total_num_facts);
+			
+			if (num_in_class > 0) {
+				prob = (double) num_in_class / (double) total_num_facts;
+				/* TODO what if it is a sooo small number ???? */
+				sum +=  -1 * prob * Util.log2(prob);
+			// System.out.println("prob "+ prob +" and the plog(p)"+plog2p+"where the sum: "+sum);
+			}
+		}
+		return sum;
+	}
 	
 	private static List<Fact> getSplitPoints(List<Fact> facts, String target) {
 		List<Fact> splits = new ArrayList<Fact>();

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/IDTreeBuilder.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/IDTreeBuilder.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/IDTreeBuilder.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -51,7 +51,7 @@
 	internalprocess(attributestoprocess)
 	 */
 
-	public DecisionTree build(WorkingMemory wm, Class<?> klass, String targetField, Collection<String> workingAttributes) {
+	public DecisionTree build(WorkingMemory wm, Class<?> klass, String targetField, List<String> workingAttributes) {
 
 		DecisionTree dt = new DecisionTree(klass.getName());
 //		**OPT		List<FactSet> facts = new ArrayList<FactSet>();
@@ -94,7 +94,7 @@
 	}
 
 	
-	public DecisionTree build(WorkingMemory wm, String klass, String targetField, Collection<String> workingAttributes) {
+	public DecisionTree build(WorkingMemory wm, String klass, String targetField, List<String> workingAttributes) {
 
 		DecisionTree dt = new DecisionTree(klass);
 //		**OPT		List<FactSet> facts = new ArrayList<FactSet>();
@@ -165,6 +165,7 @@
 			//*OPT*			return new LeafNode(facts.get(0).getFact(0).getFieldValue(target));
 			LeafNode classifiedNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
 			classifiedNode.setRank((double)facts.size()/(double)num_fact_processed);
+			classifiedNode.setNumSupporter(facts.size());
 			return classifiedNode;
 		}
 
@@ -173,6 +174,7 @@
 			/* an heuristic of the leaf classification*/
 			LeafNode noAttributeLeftNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
 			noAttributeLeftNode.setRank((double)winner_vote/(double)num_fact_processed);
+			noAttributeLeftNode.setNumSupporter(winner_vote);
 			return noAttributeLeftNode;
 		}
 
@@ -205,7 +207,8 @@
 			if (filtered_facts.get(value).isEmpty()) {
 				/* majority !!!! */
 				LeafNode majorityNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
-				majorityNode.setRank(0.0);
+				majorityNode.setRank(-1.0);
+				majorityNode.setNumSupporter(filtered_facts.get(value).size());
 				currentNode.addNode(value, majorityNode);
 			} else {
 				TreeNode newNode = id3(dt, filtered_facts.get(value), attributeNames_copy);

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/BooleanDomain.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/BooleanDomain.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/BooleanDomain.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -63,12 +63,16 @@
 	}
 	
 	public Object readString(String data) {
-		System.out.print("What is the data : "+ data);
+		//System.out.print("What is the data : "+ data);
 		if (isValid(data)) {
 			if (data.trim().equalsIgnoreCase("true"))
 				return Boolean.TRUE;
 			else if ((data.trim().equalsIgnoreCase("false")))
 				return Boolean.FALSE;
+			else if (data.trim().equalsIgnoreCase("1"))
+				return Boolean.TRUE;
+			else if (data.trim().equalsIgnoreCase("0"))
+				return Boolean.FALSE;
 			else
 				return Boolean.parseBoolean(data);
 		}else 

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainSpec.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainSpec.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainSpec.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -6,6 +6,7 @@
 @Target({ElementType.METHOD, ElementType.FIELD})
 public @interface DomainSpec {
 	    int readingSeq();
+	    boolean ignore() default false;
 	    boolean target() default false;
 	    boolean discrete() default true;
 	    String[] values() default {"bok"};

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactDistribution.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactDistribution.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactDistribution.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -26,6 +26,10 @@
 		
 	}
 	
+	public FactDistribution clone() {
+		return this.clone();
+	}
+	
 	public void setTotal(int size) {
 		this.total_num = size;	
 	}
@@ -49,6 +53,11 @@
 		for (Object target: targetDist.keySet())
 			facts_at_attr.get(attr_value).put(target,targetDist.get(target));
 	}
+	
+	public void setTargetDistForAttr(Object attr_value, FactTargetDistribution targetDist) {
+		for (Object target: targetDist.getTargetClasses())
+			facts_at_attr.get(attr_value).put(target,targetDist.getVoteFor(target));
+	}
 
 	public void change(Object attrValue, Object targetValue, int i) {
 		int num_1 = facts_at_attr.get(attrValue).get(targetValue).intValue();

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSetFactory.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSetFactory.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSetFactory.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -1,6 +1,8 @@
 package dt.memory;
 
 import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
@@ -244,9 +246,15 @@
 		OOFactSet fs = wm.getFactSet(klass);
 		Collection<Domain<?>> domains = fs.getDomains();
 		
-		BufferedReader reader = new BufferedReader(new InputStreamReader(
-				klass.getResourceAsStream(filename)));// "../data/"
-		// +
+		File file =new File(filename);
+		if(!file.exists()){
+			System.out.println("where is the file ? "+ filename);
+			System.exit(0);
+		}
+		BufferedReader reader; 
+
+		reader = new BufferedReader(new FileReader(filename));	
+		
 		String line;
 		while ((line = reader.readLine()) != null) {
 			// Fact newFact = fromString(line,domains,separator);

Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactTargetDistribution.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactTargetDistribution.java	                        (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactTargetDistribution.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -0,0 +1,107 @@
+package dt.memory;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Hashtable;
+import java.util.List;
+
+import dt.tools.Util;
+
+public class FactTargetDistribution {
+	
+	private String attr_sum = Util.sum();
+	private Domain<?> targetDomain;
+	private Hashtable<Object, Integer> num_at_target;
+	private Hashtable<Object, List<Fact>> facts_at_target;
+	
+	private int num_supported_target_classes;
+	private Object the_winner_target_class;
+	
+	public FactTargetDistribution(Domain<?> targetDomain) {
+		
+//		this.targetDomain = targetDomain.clone();
+//		targetDomain.
+		
+		num_supported_target_classes = 0;
+		this.targetDomain = targetDomain;
+		List<?> targetValues = targetDomain.getValues();
+		num_at_target =  new Hashtable<Object, Integer>(targetValues.size() + 1);
+		facts_at_target = new Hashtable<Object, List<Fact>>(targetValues.size());
+		for (Object t : targetValues) {
+			num_at_target.put(t, 0);
+			facts_at_target.put(t, new ArrayList<Fact>());
+		}
+		num_at_target.put(attr_sum, 0);
+		
+	}
+	
+	public void calculateDistribution(List<Fact> facts){
+		int total_num_facts = 0;
+		String target = targetDomain.getName();
+		for (Fact f : facts) {
+			total_num_facts++;
+			Object key = f.getFieldValue(target);
+			// System.out.println("My key: "+ key.toString());
+			num_at_target.put(key, num_at_target.get(key).intValue() + 1); // bocuk
+			facts_at_target.get(key).add(f);
+
+		}
+		num_at_target.put(attr_sum, num_at_target.get(attr_sum).intValue() + total_num_facts);
+		
+	}
+	public Collection<Object> getTargetClasses() {
+		return facts_at_target.keySet();
+	}
+	public int getSum() {
+		return num_at_target.get(attr_sum).intValue();
+	}
+	
+	public int getVoteFor(Object value) {
+		return num_at_target.get(value).intValue();
+	}
+	
+	public List<Fact> getSupportersFor(Object value) {
+		return facts_at_target.get(value);
+	}
+	public void evaluateMajority() {
+		
+		List<?> targetValues = targetDomain.getValues();
+		int winner_vote = 0;
+		int num_supporters = 0;
+		
+		Object winner = null;
+		for (Object key : targetValues) {
+
+			int num_in_class = num_at_target.get(key).intValue();
+			if (num_in_class > 0)
+				num_supporters++;
+			if (num_in_class > winner_vote) {
+				winner_vote = num_in_class;
+				winner = key;
+			}
+		}
+		setNum_supperted_target_classes(num_supporters);
+		setThe_winner_target_class(winner);
+		
+	}
+
+	public int getNum_supported_target_classes() {
+		return num_supported_target_classes;
+	}
+
+	public void setNum_supperted_target_classes(int num_supperted_target_classes) {
+		this.num_supported_target_classes = num_supperted_target_classes;
+	}
+
+	public Object getThe_winner_target_class() {
+		return the_winner_target_class;
+	}
+
+	public void setThe_winner_target_class(Object the_winner_target_class) {
+		this.the_winner_target_class = the_winner_target_class;
+	}
+	
+	
+	
+
+}

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FactProcessor.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FactProcessor.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FactProcessor.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -2,14 +2,13 @@
 
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.Comparator;
 import java.util.Hashtable;
 import java.util.Iterator;
 import java.util.List;
-import java.util.ListIterator;
 
 import dt.memory.Domain;
 import dt.memory.Fact;
+import dt.memory.FactTargetDistribution;
 
 public class FactProcessor {
 
@@ -22,8 +21,7 @@
 			return FactProcessor.splitFacts_cont(facts, choosenDomain);
 		}
 	}
-	public static Hashtable<Object, List<Fact>> splitFacts_disc(
-			List<Fact> facts, Domain<?> choosenDomain) {
+	public static Hashtable<Object, List<Fact>> splitFacts_disc(List<Fact> facts, Domain<?> choosenDomain) {
 		String attributeName = choosenDomain.getName();
 		List<?> attributeValues = choosenDomain.getValues();
 		Hashtable<Object, List<Fact>> factLists = new Hashtable<Object, List<Fact>>(attributeValues.size());
@@ -42,20 +40,22 @@
 		
 		String attributeName = attributeDomain.getName();
 		
-		System.out.println("FactProcessor.splitFacts_cont() attr_split "+ attributeName);
+		if (Util.DEBUG) System.out.println("FactProcessor.splitFacts_cont() attr_split "+ attributeName);
 		
 		List<?> categorization = attributeDomain.getValues();
 		List<Integer> split_indices = attributeDomain.getIndices();
-		System.out.println("FactProcessor.splitFacts_cont() haniymis benim repsentativelerim: "+ categorization.size() + " and the split points "+ split_indices.size());
-		
-		System.out.println("FactProcessor.splitFacts_cont() before splitting "+ facts.size());
-		int split_i =0;
-		for(int i=0; i<facts.size(); i++) {
-			if (split_i<split_indices.size() && split_indices.get(split_i).intValue()== i) {
-				System.out.println("PRINT*: FactProcessor.splitFacts_cont() will split at "+i + " the fact "+facts.get(i));
-				split_i ++;
-			} else {
-				System.out.println("PRINT: FactProcessor.splitFacts_cont() at "+i + " the fact "+facts.get(i));
+		if (Util.DEBUG) {
+			System.out.println("FactProcessor.splitFacts_cont() haniymis benim repsentativelerim: "+ categorization.size() + " and the split points "+ split_indices.size());
+			
+			System.out.println("FactProcessor.splitFacts_cont() before splitting "+ facts.size());
+			int split_i =0;
+			for(int i=0; i<facts.size(); i++) {
+				if (split_i<split_indices.size() && split_indices.get(split_i).intValue()== i) {
+					System.out.println("PRINT*: FactProcessor.splitFacts_cont() will split at "+i + " the fact "+facts.get(i));
+					split_i ++;
+				} else {
+					System.out.println("PRINT: FactProcessor.splitFacts_cont() at "+i + " the fact "+facts.get(i));
+				}
 			}
 		}
 		
@@ -68,16 +68,23 @@
 		Iterator<Integer> splits_it = split_indices.iterator();
 		int start_point = 0;
 		int index = 0;
-		while (splits_it.hasNext()) {
-			int integer_index = splits_it.next().intValue();
+		
+		while (splits_it.hasNext() || index < attributeDomain.getValues().size()) {
+			int integer_index;
+			if (splits_it.hasNext())
+				integer_index = splits_it.next().intValue();
+			else
+				integer_index = facts.size();
+			
 			Object category = attributeDomain.getValues().get(index);
 			//System.out.println("FactProcessor.splitFacts_cont() new category: "+ category);
 			Fact pseudo = new Fact();
 			try {
 				pseudo.add(attributeDomain, category);
-				
-				System.out.println("FactProcessor.splitFacts_cont() new category: "+ category );
-				System.out.println(" ("+start_point+","+integer_index+")");
+				if (Util.DEBUG) {
+					System.out.println("FactProcessor.splitFacts_cont() new category: "+ category );
+					System.out.println(" ("+start_point+","+integer_index+")");
+				}
 				factLists.put(category, facts.subList(start_point, integer_index));
 				start_point = integer_index;
 
@@ -88,53 +95,31 @@
 			index++;
 			
 		}
+		
+		
 		return factLists;
 	}
 
-	/* it must work */
-	private static Hashtable<Object, List<Fact>> splitFacts_cont_(
-			List<Fact> facts, Domain<?> attributeDomain) {
+	public static void splitUnclassifiedFacts(
+			List<Fact> unclassified_facts, FactTargetDistribution stats) {
 		
-		String attributeName = attributeDomain.getName();
-		
-		System.out.println("FactProcessor.splitFacts_cont() kimi diziyoruz: "+ attributeName);
-		
-		List<?> categorization = attributeDomain.getValues();
-		System.out.println("FactProcessor.splitFacts_cont() haniymis benim repsentativelerim: "+ categorization.size());
-		
-		Hashtable<Object, List<Fact>> factLists = new Hashtable<Object, List<Fact>>(categorization.size());
-		for (Object v: attributeDomain.getValues()) {
-			factLists.put(v, new ArrayList<Fact>());
-		}
-		
-		Comparator<Fact> cont_comp = attributeDomain.factComparator();
-		Iterator<?> category_it = attributeDomain.getValues().iterator();
-		int start_point = 0;
-		while (category_it.hasNext()) {
-			Object category = category_it.next();
-			System.out.println("FactProcessor.splitFacts_cont() new category: "+ category);
-			Fact pseudo = new Fact();
-			try {
-				pseudo.add(attributeDomain, category);
-				int insertion_point_1 = Collections.binarySearch(facts, pseudo, cont_comp);
-				if (insertion_point_1 < 0)
-					factLists.put(category, facts.subList(start_point, -1*insertion_point_1));
-				else {
-					
-					System.out.println("FactProcessor.splitFacts_cont() last category: "+ 
-							category + " the point "+-1*insertion_point_1 + " the size "+ facts.size());
-					factLists.put(category, facts.subList(start_point, insertion_point_1));
-					break;
-				}
-				start_point = -1* insertion_point_1;
-
-			} catch (Exception e) {
-				// TODO Auto-generated catch block
-				e.printStackTrace();
-			}
+		Object winner = stats.getThe_winner_target_class();
+		System.out.println(Util.ntimes("DANIEL", 2)+ " lets get unclassified daniel winner "+winner +" num of sup "  +stats.getVoteFor(winner));
+		for (Object looser: stats.getTargetClasses()) {
+			int num_supp = stats.getVoteFor(looser);
 			
+			if ((num_supp > 0) && !winner.equals(looser)) {
+				
+				System.out.println(Util.ntimes("DANIEL", 2)+ " one looser ? "+looser + " num of sup="+num_supp);
+				//System.out.println(" the num of supporters = "+ stats.getVoteFor(looser));
+				//System.out.println(" but the guys "+ stats.getSupportersFor(looser));
+				//System.out.println("How many bok: "+stats.getSupportersFor(looser).size());
+				unclassified_facts.addAll(stats.getSupportersFor(looser));
+			} else
+				System.out.println(Util.ntimes("DANIEL", 5)+ "how many times matching?? not a looser "+ looser );
 		}
-		return factLists;
+		
+		@SuppressWarnings("unused")
+		int bok = 1;
 	}
-
 }

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FileProcessor.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FileProcessor.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FileProcessor.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -23,7 +23,8 @@
 			System.out.println("Time" + dt + "\n" + bocuksTree);
 
 			RulePrinter my_printer = new RulePrinter();
-			my_printer.printer(bocuksTree, "examples", "src/rules/examples/"+drlfile);
+			boolean sort_via_rank = true;
+			my_printer.printer(bocuksTree, "examples", "src/rules/examples/"+drlfile, sort_via_rank);
 			
 			return obj_read;
 			
@@ -45,12 +46,15 @@
 			long dt = System.currentTimeMillis();
 			String target_attr = ObjectReader.getTargetAnnotation(emptyObject.getClass());
 			
-			DecisionTree bocuksTree = bocuk.build(simple, emptyObject.getClass().getName(), target_attr, null);
+			List<String> workingAttributes= ObjectReader.getWorkingAttributes(emptyObject.getClass());
+			
+			DecisionTree bocuksTree = bocuk.build(simple, emptyObject.getClass().getName(), target_attr, workingAttributes);
 			dt = System.currentTimeMillis() - dt;
 			System.out.println("Time" + dt + "\n" + bocuksTree);
 
-			RulePrinter my_printer = new RulePrinter();
-			my_printer.printer(bocuksTree, "examples", "src/rules/examples/"+drlfile);
+			RulePrinter my_printer = new RulePrinter(bocuk.getNum_fact_processed());
+			boolean sort_via_rank = true;
+			my_printer.printer(bocuksTree, "examples", "src/rules/examples/"+drlfile, sort_via_rank);
 			
 			return obj_read;
 			

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/ObjectReader.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/ObjectReader.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/ObjectReader.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -6,6 +6,7 @@
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 import java.lang.reflect.Modifier;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Iterator;
@@ -178,6 +179,29 @@
 		return null;
 	}
 
+	public static List<String> getWorkingAttributes(Class<? extends Object> classObj) {
+		Field [] element_fields = classObj.getDeclaredFields();
+		ArrayList<String> attributes = new ArrayList<String>(element_fields.length) ;
+		for( Field f: element_fields) {
+			String f_name = f.getName();
+			Class<?>[] f_class = {f.getType()};
+			if (Util.isSimpleType(f_class)) {
+				Annotation[] annotations = f.getAnnotations();
+				
+				// iterate over the annotations to locate the MaxLength constraint if it exists
+				DomainSpec spec = null;
+				for (Annotation a : annotations) {
+				    if (a instanceof DomainSpec) {
+				        spec = (DomainSpec)a; // here it is !!!
+				        if (!spec.ignore())
+				        	attributes.add(f_name);
+				    }
+				}
+			}
+		}
+		return attributes;
+	}
+	
 	//read(Class<?> element_class, Collection<Domain<?>> collection, String data, String separator)
 	public static Object read_(Class<?> element_class, Collection<Domain<?>> domains, String data, String separator) {
 
@@ -455,7 +479,4 @@
 			throw new IOException("field assignment failure:" + e);
 		}
 	}
-
-	
-
 }

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/RulePrinter.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/RulePrinter.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/RulePrinter.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -22,8 +22,23 @@
 	private Stack<NodeValue> nodes;
 	
 	private Object ruleObject;
+	
+	private boolean ONLY_ACTIVE = true;
+	private int num_facts; 
 	//private RuleComparator rule_comp = new RuleComparator();
 	
+	
+	public RulePrinter(int num_facts) {
+		ruleText = new ArrayList<String>();
+		//rule_list = new ArrayList<ArrayList<NodeValue>>();
+		rules = new ArrayList<Rule>();
+		
+		/* most important */
+		nodes = new Stack<NodeValue>();
+		
+		this.num_facts = num_facts;
+	}
+	
 	public RulePrinter() {
 		ruleText = new ArrayList<String>();
 		//rule_list = new ArrayList<ArrayList<NodeValue>>();
@@ -33,17 +48,10 @@
 		nodes = new Stack<NodeValue>();
 	}
 	
-	public void printer(DecisionTree dt, String packageName, String outputFile) {//, PrintStream object
+	public void printer(DecisionTree dt, String packageName, String outputFile, boolean sort) {//, PrintStream object
 		ruleObject = dt.getName();
 		dfs(dt.getRoot());
-		
-//		int j = 0;
-//		for( String rule: ruleText) {
-//			j++;
-//			System.out.println("Rule " +j + " suggests that \n"+ rule +".\n");
-//		}
-		
-		//String outputFile = new String("src/id3/rules"+".drl");
+	
 		if (outputFile!=null) {
 			if (packageName != null)
 				write("package " + packageName +";\n\n", false, outputFile);
@@ -54,21 +62,37 @@
 					// TODO Auto-generated catch block
 					e.printStackTrace();
 				}
-//			write("/* \n", false, outputFile);
-//			write(" * Spitting the rules= \n", true, outputFile);
-//			write(" */ \n", true, outputFile);
 		}
 		
+		if (sort)
+			Collections.sort(rules, Rule.getRankComparator());
+		
+		int total_num_facts=0;
 		int i = 0;
-		//Collections.sort(rules, Rule.getRankComparator());
 		for( Rule rule: rules) {
 			i++;
-			System.out.println("//rule " +i + " write to drl \n"+ rule +"\n");
-			if (outputFile!=null) {
-				write(rule.toString(), true, outputFile);
-				write("\n", true, outputFile);
+			if (ONLY_ACTIVE) {
+				if (rule.getRank() >= 0) {
+					System.out.println("//Active rules " +i + " write to drl \n"+ rule +"\n");
+					if (outputFile!=null) {
+						write(rule.toString(), true, outputFile);
+						write("\n", true, outputFile);
+					}
+				}
+
+			} else {
+				System.out.println("//rule " +i + " write to drl \n"+ rule +"\n");
+				if (outputFile!=null) {
+					write(rule.toString(), true, outputFile);
+					write("\n", true, outputFile);
+				}
 			}
+			total_num_facts += rule.getPopularity();
 		}
+		if (outputFile!=null) {
+			write("//THE END: Total number of facts correctly classified= "+ total_num_facts, true, outputFile);
+			write("\n", true, outputFile); // EOF
+		}
 	}
 	public Object getRuleObject() {
 		return ruleObject;
@@ -195,6 +219,7 @@
 	private int id;
 	private String attr_obj;
 	private double rank;
+	private double popularity;
 	private ArrayList<NodeValue> conditions;
 	private ArrayList<NodeValue>  actions;
 	
@@ -213,6 +238,7 @@
 	public void addAction(NodeValue current) {
 		actions.add(new NodeValue(current.getNode(), current.getNodeValue()));
 		rank = ((LeafNode)current.getNode()).getRank();
+		popularity = ((LeafNode)current.getNode()).getNum_facts_classified();
 	}
 	public void setObject(String obj) {
 		attr_obj= obj;
@@ -231,6 +257,13 @@
 		this.id= id;
 	}
 	
+	public double getPopularity() {
+		return popularity;
+	}
+
+	public void setPopularity(double popularity) {
+		this.popularity = popularity;
+	}
 	
 	
 	public String toString() {
@@ -243,8 +276,9 @@
 				System.out.println( "Goodbye: " + message ); 
 		end
 		 */
-		
+			
 		String out = ""; //"rule \"#"+getId()+" "+decision+" rank:"+rank+"\" \n";
+
 		out += "\t when";
 		out += "\n\t\t "+getObject() +"("+ "";
 		for (NodeValue cond: conditions) {
@@ -263,9 +297,10 @@
 		
 		out += "\t then ";
 		out += "\n\t\t System.out.println(\"Decision on "+decision+"= \"+" + decision + "+\": ("+action+")\");\n";
+		if (getRank() <0)
+			out += "\n\t\t System.out.println(\"But no matching fact found = DOES not fire on\");\n";
+		out = "rule \"#"+getId()+" "+decision+ "= "+action+" classifying "+getPopularity()+" num of facts with rank:"+getRank() +"\" \n" + out;
 		
-		out = "rule \"#"+getId()+" "+decision+ "= "+action+" with rank:"+rank+"\" \n" + out;
-		
 		out += "end\n";
 
 		return out;

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/Util.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/Util.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/Util.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -7,6 +7,8 @@
 
 public class Util {
 	
+	public static boolean DEBUG = false;
+	
 	public static String ntimes(String s,int n){
 		StringBuffer buf = new StringBuffer();
 		for (int i = 0; i < n; i++) {

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukFileExample.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukFileExample.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukFileExample.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -40,7 +40,8 @@
 			//System.out.println(bocuksTree);
 
 			RulePrinter my_printer = new RulePrinter();
-			my_printer.printer(bocuksTree, null, null);
+			boolean sort_via_rank = true;
+			my_printer.printer(bocuksTree, null, null, sort_via_rank);
 		}
 	}
 

Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukObjectExample.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukObjectExample.java	2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukObjectExample.java	2008-04-01 23:53:50 UTC (rev 19371)
@@ -46,6 +46,7 @@
 		System.out.println("Time"+dt+"\n"+bocuksTree);
 		
 		RulePrinter my_printer = new RulePrinter();
-		my_printer.printer(bocuksTree,"test" , new String("../dt_learning/src/test/rules"+".drl"));
+		boolean sort_via_rank = true;
+		my_printer.printer(bocuksTree,"test" , new String("../dt_learning/src/test/rules"+".drl"), sort_via_rank);
 	}
 }




More information about the jboss-svn-commits mailing list