[jboss-svn-commits] JBL Code SVN: r19371 - in labs/jbossrules/contrib/machinelearning/decisiontree/src: dt/builder and 3 other directories.
jboss-svn-commits at lists.jboss.org
jboss-svn-commits at lists.jboss.org
Tue Apr 1 19:53:50 EDT 2008
Author: gizil
Date: 2008-04-01 19:53:50 -0400 (Tue, 01 Apr 2008)
New Revision: 19371
Added:
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactTargetDistribution.java
Modified:
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/DecisionTree.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/LeafNode.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/C45TreeBuilder.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilder.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilderMT.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/Entropy.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/IDTreeBuilder.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/BooleanDomain.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainSpec.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactDistribution.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSetFactory.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FactProcessor.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FileProcessor.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/ObjectReader.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/RulePrinter.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/Util.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukFileExample.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukObjectExample.java
Log:
before recursive discretization
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/DecisionTree.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/DecisionTree.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/DecisionTree.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -6,6 +6,7 @@
import dt.memory.Domain;
import dt.memory.Fact;
+import dt.memory.FactTargetDistribution;
import dt.tools.Util;
public class DecisionTree {
@@ -76,6 +77,15 @@
// *OPT* }
return facts_in_class;
}
+
+ // *OPT* public double getInformation(List<FactSet> facts) {
+ public FactTargetDistribution getDistribution(List<Fact> facts) {
+
+ FactTargetDistribution facts_in_class = new FactTargetDistribution(getDomain(getTarget()));
+ facts_in_class.calculateDistribution(facts);
+ FACTS_READ += facts.size();
+ return facts_in_class;
+ }
// *OPT* public double getInformation(List<FactSet> facts) {
/**
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/LeafNode.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/LeafNode.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/LeafNode.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -8,10 +8,12 @@
//represents leaf nodes with the target value
private Object targetValue;
private double rank;
+ private int num_facts_classified;
public LeafNode(Domain<?> targetDomain, Object value){
super(targetDomain);
this.targetValue = value;
+ num_facts_classified = 0;
}
public void addNode(Object attributeValue, TreeNode node) {
@@ -43,4 +45,13 @@
buf.append("DECISION -> " +targetValue.toString()+"\n");
return buf.toString();
}
+
+ public void setNumSupporter(int size) {
+ this.num_facts_classified= size;
+
+ }
+
+ public int getNum_facts_classified() {
+ return this.num_facts_classified;
+ }
}
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/C45TreeBuilder.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/C45TreeBuilder.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/C45TreeBuilder.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -11,6 +11,7 @@
import dt.LeafNode;
import dt.TreeNode;
+import dt.memory.FactTargetDistribution;
import dt.memory.WorkingMemory;
import dt.memory.Fact;
import dt.memory.FactSet;
@@ -39,8 +40,9 @@
MyThread helper;
private int FUNC_CALL = 0;
- private int num_fact_processed = 0;
-
+ protected int num_fact_processed = 0;
+ private ArrayList<Fact> unclassified_facts;
+
/*
* treebuilder.execute(workingmemory, classtoexecute, attributestoprocess)
*
@@ -50,9 +52,18 @@
* internalprocess(attributestoprocess)
*/
+ public int getNum_fact_processed() {
+ return num_fact_processed;
+ }
+
+ public void setNum_fact_processed(int num_fact_processed) {
+ this.num_fact_processed = num_fact_processed;
+ }
+
public DecisionTree build(WorkingMemory wm, Class<?> klass,
- String targetField, Collection<String> workingAttributes) {
-
+ String targetField, List<String> workingAttributes) {
+
+ unclassified_facts = new ArrayList<Fact>();
DecisionTree dt = new DecisionTree(klass.getName());
// **OPT List<FactSet> facts = new ArrayList<FactSet>();
ArrayList<Fact> facts = new ArrayList<Fact>();
@@ -94,8 +105,8 @@
}
public DecisionTree build(WorkingMemory wm, String klass,
- String targetField, Collection<String> workingAttributes) {
-
+ String targetField, List<String> workingAttributes) {
+ unclassified_facts = new ArrayList<Fact>();
DecisionTree dt = new DecisionTree(klass);
// **OPT List<FactSet> facts = new ArrayList<FactSet>();
ArrayList<Fact> facts = new ArrayList<Fact>();
@@ -116,7 +127,7 @@
if (workingAttributes != null)
for (String attr : workingAttributes) {
- System.out.println("Bok degil " + attr);
+ //System.out.println("Bok degil " + attr);
dt.addDomain(klass_fs.getDomain(attr));
}
else
@@ -143,8 +154,27 @@
}
/* let's get the statistics of the results */
// List<?> targetValues = dt.getPossibleValues(dt.getTarget());
- Hashtable<Object, Integer> stats = dt.getStatistics(facts, dt
- .getTarget());// targetValues
+ //Hashtable<Object, Integer> stats_ = dt.getStatistics(facts, dt.getTarget());// targetValues
+
+ //FactTargetDistribution stats = dt.getDistribution(facts);
+
+ FactTargetDistribution stats = new FactTargetDistribution(dt.getDomain(dt.getTarget()));
+ stats.calculateDistribution(facts);
+
+ stats.evaluateMajority();
+//
+// Object winner1 = stats.getThe_winner_target_class();
+// for (Object looser: stats.getTargetClasses()) {
+// System.out.println(" the target class = "+ looser);
+// if (!winner1.equals(looser) && stats.getVoteFor(looser)>0) {
+// System.out.println(" the num of supporters = "+ stats.getVoteFor(looser));
+// System.out.println(" but the guys "+ stats.getSupportersFor(looser));
+// System.out.println("How many bok: "+stats.getSupportersFor(looser).size());
+// //unclassified_facts.addAll(stats.getSupportersFor(looser));
+// } else
+// System.out.println(Util.ntimes("DANIEL", 5)+ "how many times not matching?? not a looser "+ looser );
+// }
+ /*
Collection<Object> targetValues = stats.keySet();
int winner_vote = 0;
int num_supporters = 0;
@@ -159,24 +189,29 @@
winner = key;
}
}
+ *
/* if all elements are classified to the same value */
- if (num_supporters == 1) {
- // *OPT* return new
- // LeafNode(facts.get(0).getFact(0).getFieldValue(target));
- LeafNode classifiedNode = new LeafNode(
- dt.getDomain(dt.getTarget()), winner);
+ if (stats.getNum_supported_target_classes() == 1) {
+
+ LeafNode classifiedNode = new LeafNode(dt.getDomain(dt.getTarget()), stats.getThe_winner_target_class());
classifiedNode.setRank((double) facts.size()/(double) num_fact_processed);
+ classifiedNode.setNumSupporter(facts.size());
+
return classifiedNode;
}
/* if there is no attribute left in order to continue */
if (attributeNames.size() == 0) {
/* an heuristic of the leaf classification */
- LeafNode noAttributeLeftNode = new LeafNode(dt.getDomain(dt
- .getTarget()), winner);
- noAttributeLeftNode.setRank((double) winner_vote
- / (double) num_fact_processed);
+ Object winner = stats.getThe_winner_target_class();
+ LeafNode noAttributeLeftNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ noAttributeLeftNode.setRank((double) stats.getVoteFor(winner)/ (double) num_fact_processed);
+ noAttributeLeftNode.setNumSupporter(stats.getVoteFor(winner));
+
+ /* we need to know how many guys cannot be classified and who these guys are */
+ FactProcessor.splitUnclassifiedFacts(unclassified_facts, stats);
+
return noAttributeLeftNode;
}
@@ -190,6 +225,12 @@
Hashtable<Object, List<Fact>> filtered_facts = FactProcessor.splitFacts(facts, choosenDomain);
+ for (Object value : filtered_facts.keySet()) {
+ if (filtered_facts.get(value).isEmpty()){
+ @SuppressWarnings("unused")
+ boolean bok = true;
+ }
+ }
dt.FACTS_READ += facts.size();
for (Object value : filtered_facts.keySet()) {
@@ -201,8 +242,9 @@
if (filtered_facts.get(value).isEmpty()) {
/* majority !!!! */
- LeafNode majorityNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
- majorityNode.setRank(0.0);
+ LeafNode majorityNode = new LeafNode(dt.getDomain(dt.getTarget()), stats.getThe_winner_target_class());
+ majorityNode.setRank(-1.0); // classifying nothing
+ majorityNode.setNumSupporter(filtered_facts.get(value).size());
currentNode.addNode(value, majorityNode);
} else {
TreeNode newNode = c45(dt, filtered_facts.get(value), attributeNames_copy);
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilder.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilder.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilder.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -1,6 +1,6 @@
package dt.builder;
-import java.util.Collection;
+import java.util.List;
import dt.DecisionTree;
import dt.memory.WorkingMemory;
@@ -8,8 +8,8 @@
public interface DecisionTreeBuilder {
- DecisionTree build(WorkingMemory wm, Class<?> klass, String targetField, Collection<String> workingAttributes);
+ DecisionTree build(WorkingMemory wm, Class<?> klass, String targetField, List<String> workingAttributes);
- DecisionTree build(WorkingMemory simple, String klass_name, String target_attr,Collection<String> workingAttributes);
+ DecisionTree build(WorkingMemory simple, String klass_name, String target_attr,List<String> workingAttributes);
}
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilderMT.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilderMT.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilderMT.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -39,6 +39,8 @@
MyThread helper;
private int FUNC_CALL = 0;
private int num_fact_processed = 0;
+
+ private List<Fact> unclassified_facts;
/*
* treebuilder.execute(workingmemory, classtoexecute, attributestoprocess)
@@ -52,6 +54,7 @@
public DecisionTree build(WorkingMemory wm, Class<?> klass, String targetField, Collection<String> workingAttributes) {
+ unclassified_facts = new ArrayList<Fact>();
DecisionTree dt = new DecisionTree(klass.getName());
// **OPT List<FactSet> facts = new ArrayList<FactSet>();
ArrayList<Fact> facts = new ArrayList<Fact>();
@@ -106,6 +109,7 @@
public DecisionTree build(WorkingMemory wm, String klass, String targetField, Collection<String> workingAttributes) {
+ unclassified_facts = new ArrayList<Fact>();
DecisionTree dt = new DecisionTree(klass);
// **OPT List<FactSet> facts = new ArrayList<FactSet>();
ArrayList<Fact> facts = new ArrayList<Fact>();
@@ -215,6 +219,7 @@
//*OPT* return new LeafNode(facts.get(0).getFact(0).getFieldValue(target));
LeafNode classifiedNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
classifiedNode.setRank((double)facts.size()/(double)num_fact_processed);
+ classifiedNode.setNumSupporter(facts.size());
return classifiedNode;
}
@@ -223,6 +228,7 @@
/* an heuristic of the leaf classification*/
LeafNode noAttributeLeftNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
noAttributeLeftNode.setRank((double)winner_vote/(double)num_fact_processed);
+ noAttributeLeftNode.setNumSupporter(winner_vote);
return noAttributeLeftNode;
}
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/Entropy.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/Entropy.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/Entropy.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -12,15 +12,15 @@
import dt.memory.Domain;
import dt.memory.Fact;
import dt.memory.FactDistribution;
+import dt.memory.FactTargetDistribution;
import dt.tools.Util;
-import dt.memory.NumericDomain;
public class Entropy implements InformationMeasure {
public static Domain<?> chooseContAttribute(DecisionTree dt, List<Fact> facts,
- Hashtable<Object, Integer> facts_in_class, List<String> attrs) {
+ FactTargetDistribution facts_in_class, List<String> attrs) {
- double dt_info = calc_info(facts_in_class, facts.size());
+ double dt_info = calc_info(facts_in_class);
double greatestGain = -100000.0;
String attributeWithGreatestGain = attrs.get(0);
Domain attrDomain = dt.getDomain(attributeWithGreatestGain);
@@ -43,14 +43,14 @@
attrDomain = dt.getDomain(attr).clone();
attrDomain.addPseudoValue(facts.get(facts.size()-1).getFieldValue(attr));
- System.out.println("entropy.chooseContAttribute(1)*********** num of split for "+
- attr+": "+ attrDomain.getValues().size()+ " ("+ attrDomain.getValues().get(0)+")");
+// System.out.println("entropy.chooseContAttribute(1)*********** num of split for "+
+// attr+": "+ attrDomain.getValues().size()+ " ("+ attrDomain.getValues().get(0)+")");
split_indices = new ArrayList<Integer>();
- System.out.println("entropy.chooseContAttribute(BOK) size "+split_indices.size());
+ //System.out.println("entropy.chooseContAttribute(BOK) size "+split_indices.size());
gain = dt_info - info_contattr(facts, attrDomain, targetDomain,
facts_in_class, split_indices, splits);
- System.out.println("entropy.chooseContAttribute(2)*********** num of split for "+
- attr+": "+ attrDomain.getValues().size());
+// System.out.println("entropy.chooseContAttribute(2)*********** num of split for "+
+// attr+": "+ attrDomain.getValues().size());
}
if (gain > greatestGain) {
@@ -64,7 +64,242 @@
return bestDomain;
}
+ public static double info_contattr(List<Fact> facts,
+ Domain splitDomain, Domain<?> targetDomain,
+ FactTargetDistribution facts_in_class,
+ List<Integer> split_indices,
+ List<Fact> split_facts) {
+ String splitAttr = splitDomain.getName();
+ List<?> splitValues = splitDomain.getValues();
+ String targetAttr = targetDomain.getName();
+ List<?> targetValues = targetDomain.getValues();
+ if (Util.DEBUG) {
+ System.out.println("entropy.info_cont() attributeToSplit? " + splitAttr);
+ int f_i=0;
+ for(Fact f: facts) {
+ System.out.println("entropy.info_cont() SORTING: "+f_i+" attr "+splitAttr+ " "+ f );
+ f_i++;
+ }
+ }
+
+ if (facts.size() <= 1) {
+ System.out
+ .println("The size of the fact list is 0 oups??? exiting....");
+ System.exit(0);
+ }
+ if (split_facts.size() < 1) {
+ System.out
+ .println("The size of the splits is 0 oups??? exiting....");
+ System.exit(0);
+ }
+
+ /* initialize the distribution */
+ Object key0 = Integer.valueOf(0);
+ Object key1 = Integer.valueOf(1);
+ List<Object> keys = new ArrayList<Object>(2);
+ keys.add(key0);
+ keys.add(key1);
+
+
+ FactDistribution facts_at_attribute = new FactDistribution(keys, targetValues);
+ facts_at_attribute.setTotal(facts.size());
+ facts_at_attribute.setTargetDistForAttr(key1, facts_in_class);
+ facts_at_attribute.setSumForAttr(key1, facts.size());
+
+ double best_sum = -100000.0;
+ Object value_to_split = splitValues.get(0);
+ int split_index =1, index = 1;
+ Iterator<Fact> f_ite = facts.iterator();
+ Fact f1 = f_ite.next();
+ Comparator<Fact> targetComp = f1.getDomain(targetAttr).factComparator();
+ if (Util.DEBUG) System.out.println("\nentropy.info_cont() SEARCHING: "+split_index+" attr "+splitAttr+ " "+ f1 );
+ while (f_ite.hasNext()) {/* 2. Look for potential cut-points. */
+
+ Fact f2 = f_ite.next();
+ if (Util.DEBUG) System.out.print("entropy.info_cont() SEARCHING: "+(index+1)+" attr "+splitAttr+ " "+ f2 );
+ Object targetKey = f2.getFieldValue(targetAttr);
+
+ // System.out.println("My key: "+ targetKey.toString());
+ //for (Object attr_key : attr_values)
+
+ /* every time it change the place in the distribution */
+ facts_at_attribute.change(key0, targetKey, +1);
+ facts_at_attribute.change(key1, targetKey, -1);
+
+ /*
+ * 2.1 Cut points are points in the sorted list above where the class labels change.
+ * Eg. if I had five instances with values for the attribute of interest and labels
+ * (1.0,A), (1.4,A), (1.7, A), (2.0,B), (3.0, B), (7.0, A), then there are only
+ * two cutpoints of interest: 1.85 and 5 (mid-way between the points
+ * where the classes change from A to B or vice versa).
+ */
+
+ if ( targetComp.compare(f1, f2)!=0) {
+ // the cut point
+ Number cp_i = (Number) f1.getFieldValue(splitAttr);
+ Number cp_i_next = (Number) f2.getFieldValue(splitAttr);
+
+ Number cut_point = (Double)(cp_i.doubleValue() + cp_i_next.doubleValue()) / 2;
+
+ /*
+ * 3. Evaluate your favourite disparity measure
+ * (info gain, gain ratio, gini coefficient, chi-squared test) on the cut point
+ * and calculate its gain
+ */
+ double sum = calc_info_attr(facts_at_attribute);
+ //System.out.println("**entropy.info_contattr() FOUND: "+ sum + " best sum "+best_sum +
+ if (Util.DEBUG) System.out.println(" **Try "+ sum + " best sum "+best_sum +
+ " value ("+ f1.getFieldValue(splitAttr) +"-|"+ value_to_split+"|-"+ f2.getFieldValue(splitAttr)+")");
+
+ if (sum > best_sum) {
+ best_sum = sum;
+ value_to_split = cut_point;
+ if (Util.DEBUG) System.out.println(Util.ntimes("?", 10)+"** FOUND: target ("+ f1.getFieldValue(targetAttr) +"-|T|-"+ f2.getFieldValue(targetAttr)+")");
+ split_index = index;
+ }
+ } else {}
+ f1 = f2;
+ index++;
+ }
+ splitDomain.addPseudoValue(value_to_split);
+ Util.insert(split_indices, Integer.valueOf(split_index));
+ if (Util.DEBUG) {
+ System.out.println("entropy.info_contattr(BOK_last) split_indices.size "+split_indices.size());
+ for(Integer i : split_indices)
+ System.out.println("entropy.info_contattr(FOUNDS) split_indices "+i + " the fact "+facts.get(i));
+ System.out.println("entropy.chooseContAttribute(1.5)*********** num of split for "+
+ splitAttr+": "+ splitDomain.getValues().size());
+ }
+ return best_sum;
+ }
+
+ public static double info_contattr_rec(List<Fact> facts,
+ Domain splitDomain, Domain<?> targetDomain,
+ FactTargetDistribution facts_in_class,
+ List<Integer> split_indices,
+ List<Fact> split_facts) {
+
+ String splitAttr = splitDomain.getName();
+ List<?> splitValues = splitDomain.getValues();
+ String targetAttr = targetDomain.getName();
+ List<?> targetValues = targetDomain.getValues();
+ if (Util.DEBUG) {
+ System.out.println("entropy.info_cont() attributeToSplit? " + splitAttr);
+ int f_i=0;
+ for(Fact f: facts) {
+ System.out.println("entropy.info_cont() SORTING: "+f_i+" attr "+splitAttr+ " "+ f );
+ f_i++;
+ }
+ }
+
+ if (facts.size() <= 1) {
+ System.out
+ .println("The size of the fact list is 0 oups??? exiting....");
+ System.exit(0);
+ }
+ if (split_facts.size() < 1) {
+ System.out
+ .println("The size of the splits is 0 oups??? exiting....");
+ System.exit(0);
+ }
+
+ /* initialize the distribution */
+ Object key0 = Integer.valueOf(0);
+ Object key1 = Integer.valueOf(1);
+ List<Object> keys = new ArrayList<Object>(2);
+ keys.add(key0);
+ keys.add(key1);
+
+
+ FactDistribution facts_at_attribute = new FactDistribution(keys, targetValues);
+ facts_at_attribute.setTotal(facts.size());
+ facts_at_attribute.setTargetDistForAttr(key1, facts_in_class);
+ facts_at_attribute.setSumForAttr(key1, facts.size());
+
+ double best_sum = -100000.0;
+ Object value_to_split = splitValues.get(0);
+ int split_index =1, index = 1;
+ FactDistribution best_distribution;
+ Iterator<Fact> f_ite = facts.iterator();
+ Fact f1 = f_ite.next();
+ Comparator<Fact> targetComp = f1.getDomain(targetAttr).factComparator();
+ if (Util.DEBUG) System.out.println("\nentropy.info_cont() SEARCHING: "+split_index+" attr "+splitAttr+ " "+ f1 );
+ while (f_ite.hasNext()) {/* 2. Look for potential cut-points. */
+
+ Fact f2 = f_ite.next();
+ if (Util.DEBUG) System.out.print("entropy.info_cont() SEARCHING: "+(index+1)+" attr "+splitAttr+ " "+ f2 );
+ Object targetKey = f2.getFieldValue(targetAttr);
+
+ // System.out.println("My key: "+ targetKey.toString());
+ //for (Object attr_key : attr_values)
+
+ /* every time it change the place in the distribution */
+ facts_at_attribute.change(key0, targetKey, +1);
+ facts_at_attribute.change(key1, targetKey, -1);
+
+ /*
+ * 2.1 Cut points are points in the sorted list above where the class labels change.
+ * Eg. if I had five instances with values for the attribute of interest and labels
+ * (1.0,A), (1.4,A), (1.7, A), (2.0,B), (3.0, B), (7.0, A), then there are only
+ * two cutpoints of interest: 1.85 and 5 (mid-way between the points
+ * where the classes change from A to B or vice versa).
+ */
+
+ if ( targetComp.compare(f1, f2)!=0) {
+ // the cut point
+ Number cp_i = (Number) f1.getFieldValue(splitAttr);
+ Number cp_i_next = (Number) f2.getFieldValue(splitAttr);
+
+ Number cut_point = (Double)(cp_i.doubleValue() + cp_i_next.doubleValue()) / 2;
+
+ /*
+ * 3. Evaluate your favourite disparity measure
+ * (info gain, gain ratio, gini coefficient, chi-squared test) on the cut point
+ * and calculate its gain
+ */
+ double sum = calc_info_attr(facts_at_attribute);
+ //System.out.println("**entropy.info_contattr() FOUND: "+ sum + " best sum "+best_sum +
+ if (Util.DEBUG) System.out.println(" **Try "+ sum + " best sum "+best_sum +
+ " value ("+ f1.getFieldValue(splitAttr) +"-|"+ value_to_split+"|-"+ f2.getFieldValue(splitAttr)+")");
+
+ if (sum > best_sum) {
+ best_sum = sum;
+ value_to_split = cut_point;
+ if (Util.DEBUG) System.out.println(Util.ntimes("?", 10)+"** FOUND: target ("+ f1.getFieldValue(targetAttr) +"-|T|-"+ f2.getFieldValue(targetAttr)+")");
+ split_index = index;
+ best_distribution = facts_at_attribute.clone();
+ }
+ } else {}
+ f1 = f2;
+ index++;
+ }
+ splitDomain.addPseudoValue(value_to_split);
+ Util.insert(split_indices, Integer.valueOf(split_index));
+ /*
+ * info_contattr_rec(List<Fact> facts,
+ Domain splitDomain, Domain<?> targetDomain,
+ FactTargetDistribution facts_in_class,
+ List<Integer> split_indices,
+ List<Fact> split_facts)
+ */
+// info_contattr_rec(facts.subList(0, split_index),
+// splitDomain, targetDomain,
+// best_distribution.getAttrFor(key0),
+// split_indices,
+// split_facts);
+
+
+ if (Util.DEBUG) {
+ System.out.println("entropy.info_contattr(BOK_last) split_indices.size "+split_indices.size());
+ for(Integer i : split_indices)
+ System.out.println("entropy.info_contattr(FOUNDS) split_indices "+i + " the fact "+facts.get(i));
+ System.out.println("entropy.chooseContAttribute(1.5)*********** num of split for "+
+ splitAttr+": "+ splitDomain.getValues().size());
+ }
+ return best_sum;
+ }
+
/*
* GLOBAL DISCRETIZATION a a b a b b b b b (target) 1 2 3 4 5 6 7 8 9 (attr
* c) 0 0 0 0 1 1 1 1 1 "<5", ">=5" "true" "false"
@@ -90,7 +325,7 @@
* instances of a single class or (b) some stopping criterion is reached. I
* can't remember what stopping criteria they used.
*/
- public static double info_contattr(List<Fact> facts,
+ public static double info_contattr_old (List<Fact> facts,
Domain splitDomain, Domain<?> targetDomain,
Hashtable<Object, Integer> facts_in_class,
List<Integer> split_indices,
@@ -100,11 +335,13 @@
List<?> splitValues = splitDomain.getValues();
String targetAttr = targetDomain.getName();
List<?> targetValues = targetDomain.getValues();
- System.out.println("entropy.info_cont() attributeToSplit? " + splitAttr);
- int f_i=0;
- for(Fact f: facts) {
- System.out.println("entropy.info_cont() SORTING: "+f_i+" attr "+splitAttr+ " "+ f );
- f_i++;
+ if (Util.DEBUG) {
+ System.out.println("entropy.info_cont() attributeToSplit? " + splitAttr);
+ int f_i=0;
+ for(Fact f: facts) {
+ System.out.println("entropy.info_cont() SORTING: "+f_i+" attr "+splitAttr+ " "+ f );
+ f_i++;
+ }
}
if (facts.size() <= 1) {
@@ -137,11 +374,11 @@
Iterator<Fact> f_ite = facts.iterator();
Fact f1 = f_ite.next();
Comparator<Fact> targetComp = f1.getDomain(targetAttr).factComparator();
- System.out.println("\nentropy.info_cont() SEARCHING: "+split_index+" attr "+splitAttr+ " "+ f1 );
+ if (Util.DEBUG) System.out.println("\nentropy.info_cont() SEARCHING: "+split_index+" attr "+splitAttr+ " "+ f1 );
while (f_ite.hasNext()) {/* 2. Look for potential cut-points. */
Fact f2 = f_ite.next();
- System.out.print("entropy.info_cont() SEARCHING: "+(index+1)+" attr "+splitAttr+ " "+ f2 );
+ if (Util.DEBUG) System.out.print("entropy.info_cont() SEARCHING: "+(index+1)+" attr "+splitAttr+ " "+ f2 );
Object targetKey = f2.getFieldValue(targetAttr);
// System.out.println("My key: "+ targetKey.toString());
@@ -173,13 +410,13 @@
*/
double sum = calc_info_attr(facts_at_attribute);
//System.out.println("**entropy.info_contattr() FOUND: "+ sum + " best sum "+best_sum +
- System.out.println(" **Try "+ sum + " best sum "+best_sum +
+ if (Util.DEBUG) System.out.println(" **Try "+ sum + " best sum "+best_sum +
" value ("+ f1.getFieldValue(splitAttr) +"-|"+ value_to_split+"|-"+ f2.getFieldValue(splitAttr)+")");
if (sum > best_sum) {
best_sum = sum;
value_to_split = cut_point;
- System.out.println(Util.ntimes("?", 10)+"** FOUND: target ("+ f1.getFieldValue(targetAttr) +"-|T|-"+ f2.getFieldValue(targetAttr)+")");
+ if (Util.DEBUG) System.out.println(Util.ntimes("?", 10)+"** FOUND: target ("+ f1.getFieldValue(targetAttr) +"-|T|-"+ f2.getFieldValue(targetAttr)+")");
split_index = index;
}
} else {}
@@ -188,11 +425,13 @@
}
splitDomain.addPseudoValue(value_to_split);
Util.insert(split_indices, Integer.valueOf(split_index));
- System.out.println("entropy.info_contattr(BOK_last) split_indices.size "+split_indices.size());
- for(Integer i : split_indices)
- System.out.println("entropy.info_contattr(FOUNDS) split_indices "+i + " the fact "+facts.get(i));
- System.out.println("entropy.chooseContAttribute(1.5)*********** num of split for "+
- splitAttr+": "+ splitDomain.getValues().size());
+ if (Util.DEBUG) {
+ System.out.println("entropy.info_contattr(BOK_last) split_indices.size "+split_indices.size());
+ for(Integer i : split_indices)
+ System.out.println("entropy.info_contattr(FOUNDS) split_indices "+i + " the fact "+facts.get(i));
+ System.out.println("entropy.chooseContAttribute(1.5)*********** num of split for "+
+ splitAttr+": "+ splitDomain.getValues().size());
+ }
return best_sum;
}
@@ -216,7 +455,7 @@
} else {
gain = dt_info - info_attr(facts, dt.getDomain(attr), targetDomain);
}
- System.out.println("Attribute: " + attr + " the gain: " + gain);
+ if (Util.DEBUG) System.out.println("Attribute: " + attr + " the gain: " + gain);
if (gain > greatestGain) {
greatestGain = gain;
attributeWithGreatestGain = attr;
@@ -235,7 +474,7 @@
String target = targetDomain.getName();
List<?> targetValues = targetDomain.getValues();
- System.out.println("What is the attributeToSplit? " + attributeToSplit);
+ if (Util.DEBUG) System.out.println("What is the attributeToSplit? " + attributeToSplit);
/* initialize the hashtable */
FactDistribution facts_at_attribute = new FactDistribution(attributeValues, targetValues);
@@ -295,6 +534,25 @@
}
return sum;
}
+ /* you can calculate this before */
+ public static double calc_info(FactTargetDistribution facts_in_class) {
+
+ int total_num_facts = facts_in_class.getSum();
+ Collection<Object> targetValues = facts_in_class.getTargetClasses();
+ double prob, sum = 0;
+ for (Object key : targetValues) {
+ int num_in_class = facts_in_class.getVoteFor(key);
+ // System.out.println("num_in_class : "+ num_in_class + " key "+ key+ " and the total num "+ total_num_facts);
+
+ if (num_in_class > 0) {
+ prob = (double) num_in_class / (double) total_num_facts;
+ /* TODO what if it is a sooo small number ???? */
+ sum += -1 * prob * Util.log2(prob);
+ // System.out.println("prob "+ prob +" and the plog(p)"+plog2p+"where the sum: "+sum);
+ }
+ }
+ return sum;
+ }
private static List<Fact> getSplitPoints(List<Fact> facts, String target) {
List<Fact> splits = new ArrayList<Fact>();
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/IDTreeBuilder.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/IDTreeBuilder.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/IDTreeBuilder.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -51,7 +51,7 @@
internalprocess(attributestoprocess)
*/
- public DecisionTree build(WorkingMemory wm, Class<?> klass, String targetField, Collection<String> workingAttributes) {
+ public DecisionTree build(WorkingMemory wm, Class<?> klass, String targetField, List<String> workingAttributes) {
DecisionTree dt = new DecisionTree(klass.getName());
// **OPT List<FactSet> facts = new ArrayList<FactSet>();
@@ -94,7 +94,7 @@
}
- public DecisionTree build(WorkingMemory wm, String klass, String targetField, Collection<String> workingAttributes) {
+ public DecisionTree build(WorkingMemory wm, String klass, String targetField, List<String> workingAttributes) {
DecisionTree dt = new DecisionTree(klass);
// **OPT List<FactSet> facts = new ArrayList<FactSet>();
@@ -165,6 +165,7 @@
//*OPT* return new LeafNode(facts.get(0).getFact(0).getFieldValue(target));
LeafNode classifiedNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
classifiedNode.setRank((double)facts.size()/(double)num_fact_processed);
+ classifiedNode.setNumSupporter(facts.size());
return classifiedNode;
}
@@ -173,6 +174,7 @@
/* an heuristic of the leaf classification*/
LeafNode noAttributeLeftNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
noAttributeLeftNode.setRank((double)winner_vote/(double)num_fact_processed);
+ noAttributeLeftNode.setNumSupporter(winner_vote);
return noAttributeLeftNode;
}
@@ -205,7 +207,8 @@
if (filtered_facts.get(value).isEmpty()) {
/* majority !!!! */
LeafNode majorityNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
- majorityNode.setRank(0.0);
+ majorityNode.setRank(-1.0);
+ majorityNode.setNumSupporter(filtered_facts.get(value).size());
currentNode.addNode(value, majorityNode);
} else {
TreeNode newNode = id3(dt, filtered_facts.get(value), attributeNames_copy);
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/BooleanDomain.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/BooleanDomain.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/BooleanDomain.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -63,12 +63,16 @@
}
public Object readString(String data) {
- System.out.print("What is the data : "+ data);
+ //System.out.print("What is the data : "+ data);
if (isValid(data)) {
if (data.trim().equalsIgnoreCase("true"))
return Boolean.TRUE;
else if ((data.trim().equalsIgnoreCase("false")))
return Boolean.FALSE;
+ else if (data.trim().equalsIgnoreCase("1"))
+ return Boolean.TRUE;
+ else if (data.trim().equalsIgnoreCase("0"))
+ return Boolean.FALSE;
else
return Boolean.parseBoolean(data);
}else
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainSpec.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainSpec.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainSpec.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -6,6 +6,7 @@
@Target({ElementType.METHOD, ElementType.FIELD})
public @interface DomainSpec {
int readingSeq();
+ boolean ignore() default false;
boolean target() default false;
boolean discrete() default true;
String[] values() default {"bok"};
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactDistribution.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactDistribution.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactDistribution.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -26,6 +26,10 @@
}
+ public FactDistribution clone() {
+ return this.clone();
+ }
+
public void setTotal(int size) {
this.total_num = size;
}
@@ -49,6 +53,11 @@
for (Object target: targetDist.keySet())
facts_at_attr.get(attr_value).put(target,targetDist.get(target));
}
+
+ public void setTargetDistForAttr(Object attr_value, FactTargetDistribution targetDist) {
+ for (Object target: targetDist.getTargetClasses())
+ facts_at_attr.get(attr_value).put(target,targetDist.getVoteFor(target));
+ }
public void change(Object attrValue, Object targetValue, int i) {
int num_1 = facts_at_attr.get(attrValue).get(targetValue).intValue();
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSetFactory.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSetFactory.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSetFactory.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -1,6 +1,8 @@
package dt.memory;
import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
@@ -244,9 +246,15 @@
OOFactSet fs = wm.getFactSet(klass);
Collection<Domain<?>> domains = fs.getDomains();
- BufferedReader reader = new BufferedReader(new InputStreamReader(
- klass.getResourceAsStream(filename)));// "../data/"
- // +
+ File file =new File(filename);
+ if(!file.exists()){
+ System.out.println("where is the file ? "+ filename);
+ System.exit(0);
+ }
+ BufferedReader reader;
+
+ reader = new BufferedReader(new FileReader(filename));
+
String line;
while ((line = reader.readLine()) != null) {
// Fact newFact = fromString(line,domains,separator);
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactTargetDistribution.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactTargetDistribution.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactTargetDistribution.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -0,0 +1,107 @@
+package dt.memory;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Hashtable;
+import java.util.List;
+
+import dt.tools.Util;
+
+public class FactTargetDistribution {
+
+ private String attr_sum = Util.sum();
+ private Domain<?> targetDomain;
+ private Hashtable<Object, Integer> num_at_target;
+ private Hashtable<Object, List<Fact>> facts_at_target;
+
+ private int num_supported_target_classes;
+ private Object the_winner_target_class;
+
+ public FactTargetDistribution(Domain<?> targetDomain) {
+
+// this.targetDomain = targetDomain.clone();
+// targetDomain.
+
+ num_supported_target_classes = 0;
+ this.targetDomain = targetDomain;
+ List<?> targetValues = targetDomain.getValues();
+ num_at_target = new Hashtable<Object, Integer>(targetValues.size() + 1);
+ facts_at_target = new Hashtable<Object, List<Fact>>(targetValues.size());
+ for (Object t : targetValues) {
+ num_at_target.put(t, 0);
+ facts_at_target.put(t, new ArrayList<Fact>());
+ }
+ num_at_target.put(attr_sum, 0);
+
+ }
+
+ public void calculateDistribution(List<Fact> facts){
+ int total_num_facts = 0;
+ String target = targetDomain.getName();
+ for (Fact f : facts) {
+ total_num_facts++;
+ Object key = f.getFieldValue(target);
+ // System.out.println("My key: "+ key.toString());
+ num_at_target.put(key, num_at_target.get(key).intValue() + 1); // bocuk
+ facts_at_target.get(key).add(f);
+
+ }
+ num_at_target.put(attr_sum, num_at_target.get(attr_sum).intValue() + total_num_facts);
+
+ }
+ public Collection<Object> getTargetClasses() {
+ return facts_at_target.keySet();
+ }
+ public int getSum() {
+ return num_at_target.get(attr_sum).intValue();
+ }
+
+ public int getVoteFor(Object value) {
+ return num_at_target.get(value).intValue();
+ }
+
+ public List<Fact> getSupportersFor(Object value) {
+ return facts_at_target.get(value);
+ }
+ public void evaluateMajority() {
+
+ List<?> targetValues = targetDomain.getValues();
+ int winner_vote = 0;
+ int num_supporters = 0;
+
+ Object winner = null;
+ for (Object key : targetValues) {
+
+ int num_in_class = num_at_target.get(key).intValue();
+ if (num_in_class > 0)
+ num_supporters++;
+ if (num_in_class > winner_vote) {
+ winner_vote = num_in_class;
+ winner = key;
+ }
+ }
+ setNum_supperted_target_classes(num_supporters);
+ setThe_winner_target_class(winner);
+
+ }
+
+ public int getNum_supported_target_classes() {
+ return num_supported_target_classes;
+ }
+
+ public void setNum_supperted_target_classes(int num_supperted_target_classes) {
+ this.num_supported_target_classes = num_supperted_target_classes;
+ }
+
+ public Object getThe_winner_target_class() {
+ return the_winner_target_class;
+ }
+
+ public void setThe_winner_target_class(Object the_winner_target_class) {
+ this.the_winner_target_class = the_winner_target_class;
+ }
+
+
+
+
+}
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FactProcessor.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FactProcessor.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FactProcessor.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -2,14 +2,13 @@
import java.util.ArrayList;
import java.util.Collections;
-import java.util.Comparator;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
-import java.util.ListIterator;
import dt.memory.Domain;
import dt.memory.Fact;
+import dt.memory.FactTargetDistribution;
public class FactProcessor {
@@ -22,8 +21,7 @@
return FactProcessor.splitFacts_cont(facts, choosenDomain);
}
}
- public static Hashtable<Object, List<Fact>> splitFacts_disc(
- List<Fact> facts, Domain<?> choosenDomain) {
+ public static Hashtable<Object, List<Fact>> splitFacts_disc(List<Fact> facts, Domain<?> choosenDomain) {
String attributeName = choosenDomain.getName();
List<?> attributeValues = choosenDomain.getValues();
Hashtable<Object, List<Fact>> factLists = new Hashtable<Object, List<Fact>>(attributeValues.size());
@@ -42,20 +40,22 @@
String attributeName = attributeDomain.getName();
- System.out.println("FactProcessor.splitFacts_cont() attr_split "+ attributeName);
+ if (Util.DEBUG) System.out.println("FactProcessor.splitFacts_cont() attr_split "+ attributeName);
List<?> categorization = attributeDomain.getValues();
List<Integer> split_indices = attributeDomain.getIndices();
- System.out.println("FactProcessor.splitFacts_cont() haniymis benim repsentativelerim: "+ categorization.size() + " and the split points "+ split_indices.size());
-
- System.out.println("FactProcessor.splitFacts_cont() before splitting "+ facts.size());
- int split_i =0;
- for(int i=0; i<facts.size(); i++) {
- if (split_i<split_indices.size() && split_indices.get(split_i).intValue()== i) {
- System.out.println("PRINT*: FactProcessor.splitFacts_cont() will split at "+i + " the fact "+facts.get(i));
- split_i ++;
- } else {
- System.out.println("PRINT: FactProcessor.splitFacts_cont() at "+i + " the fact "+facts.get(i));
+ if (Util.DEBUG) {
+ System.out.println("FactProcessor.splitFacts_cont() haniymis benim repsentativelerim: "+ categorization.size() + " and the split points "+ split_indices.size());
+
+ System.out.println("FactProcessor.splitFacts_cont() before splitting "+ facts.size());
+ int split_i =0;
+ for(int i=0; i<facts.size(); i++) {
+ if (split_i<split_indices.size() && split_indices.get(split_i).intValue()== i) {
+ System.out.println("PRINT*: FactProcessor.splitFacts_cont() will split at "+i + " the fact "+facts.get(i));
+ split_i ++;
+ } else {
+ System.out.println("PRINT: FactProcessor.splitFacts_cont() at "+i + " the fact "+facts.get(i));
+ }
}
}
@@ -68,16 +68,23 @@
Iterator<Integer> splits_it = split_indices.iterator();
int start_point = 0;
int index = 0;
- while (splits_it.hasNext()) {
- int integer_index = splits_it.next().intValue();
+
+ while (splits_it.hasNext() || index < attributeDomain.getValues().size()) {
+ int integer_index;
+ if (splits_it.hasNext())
+ integer_index = splits_it.next().intValue();
+ else
+ integer_index = facts.size();
+
Object category = attributeDomain.getValues().get(index);
//System.out.println("FactProcessor.splitFacts_cont() new category: "+ category);
Fact pseudo = new Fact();
try {
pseudo.add(attributeDomain, category);
-
- System.out.println("FactProcessor.splitFacts_cont() new category: "+ category );
- System.out.println(" ("+start_point+","+integer_index+")");
+ if (Util.DEBUG) {
+ System.out.println("FactProcessor.splitFacts_cont() new category: "+ category );
+ System.out.println(" ("+start_point+","+integer_index+")");
+ }
factLists.put(category, facts.subList(start_point, integer_index));
start_point = integer_index;
@@ -88,53 +95,31 @@
index++;
}
+
+
return factLists;
}
- /* it must work */
- private static Hashtable<Object, List<Fact>> splitFacts_cont_(
- List<Fact> facts, Domain<?> attributeDomain) {
+ public static void splitUnclassifiedFacts(
+ List<Fact> unclassified_facts, FactTargetDistribution stats) {
- String attributeName = attributeDomain.getName();
-
- System.out.println("FactProcessor.splitFacts_cont() kimi diziyoruz: "+ attributeName);
-
- List<?> categorization = attributeDomain.getValues();
- System.out.println("FactProcessor.splitFacts_cont() haniymis benim repsentativelerim: "+ categorization.size());
-
- Hashtable<Object, List<Fact>> factLists = new Hashtable<Object, List<Fact>>(categorization.size());
- for (Object v: attributeDomain.getValues()) {
- factLists.put(v, new ArrayList<Fact>());
- }
-
- Comparator<Fact> cont_comp = attributeDomain.factComparator();
- Iterator<?> category_it = attributeDomain.getValues().iterator();
- int start_point = 0;
- while (category_it.hasNext()) {
- Object category = category_it.next();
- System.out.println("FactProcessor.splitFacts_cont() new category: "+ category);
- Fact pseudo = new Fact();
- try {
- pseudo.add(attributeDomain, category);
- int insertion_point_1 = Collections.binarySearch(facts, pseudo, cont_comp);
- if (insertion_point_1 < 0)
- factLists.put(category, facts.subList(start_point, -1*insertion_point_1));
- else {
-
- System.out.println("FactProcessor.splitFacts_cont() last category: "+
- category + " the point "+-1*insertion_point_1 + " the size "+ facts.size());
- factLists.put(category, facts.subList(start_point, insertion_point_1));
- break;
- }
- start_point = -1* insertion_point_1;
-
- } catch (Exception e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
+ Object winner = stats.getThe_winner_target_class();
+ System.out.println(Util.ntimes("DANIEL", 2)+ " lets get unclassified daniel winner "+winner +" num of sup " +stats.getVoteFor(winner));
+ for (Object looser: stats.getTargetClasses()) {
+ int num_supp = stats.getVoteFor(looser);
+ if ((num_supp > 0) && !winner.equals(looser)) {
+
+ System.out.println(Util.ntimes("DANIEL", 2)+ " one looser ? "+looser + " num of sup="+num_supp);
+ //System.out.println(" the num of supporters = "+ stats.getVoteFor(looser));
+ //System.out.println(" but the guys "+ stats.getSupportersFor(looser));
+ //System.out.println("How many bok: "+stats.getSupportersFor(looser).size());
+ unclassified_facts.addAll(stats.getSupportersFor(looser));
+ } else
+ System.out.println(Util.ntimes("DANIEL", 5)+ "how many times matching?? not a looser "+ looser );
}
- return factLists;
+
+ @SuppressWarnings("unused")
+ int bok = 1;
}
-
}
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FileProcessor.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FileProcessor.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FileProcessor.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -23,7 +23,8 @@
System.out.println("Time" + dt + "\n" + bocuksTree);
RulePrinter my_printer = new RulePrinter();
- my_printer.printer(bocuksTree, "examples", "src/rules/examples/"+drlfile);
+ boolean sort_via_rank = true;
+ my_printer.printer(bocuksTree, "examples", "src/rules/examples/"+drlfile, sort_via_rank);
return obj_read;
@@ -45,12 +46,15 @@
long dt = System.currentTimeMillis();
String target_attr = ObjectReader.getTargetAnnotation(emptyObject.getClass());
- DecisionTree bocuksTree = bocuk.build(simple, emptyObject.getClass().getName(), target_attr, null);
+ List<String> workingAttributes= ObjectReader.getWorkingAttributes(emptyObject.getClass());
+
+ DecisionTree bocuksTree = bocuk.build(simple, emptyObject.getClass().getName(), target_attr, workingAttributes);
dt = System.currentTimeMillis() - dt;
System.out.println("Time" + dt + "\n" + bocuksTree);
- RulePrinter my_printer = new RulePrinter();
- my_printer.printer(bocuksTree, "examples", "src/rules/examples/"+drlfile);
+ RulePrinter my_printer = new RulePrinter(bocuk.getNum_fact_processed());
+ boolean sort_via_rank = true;
+ my_printer.printer(bocuksTree, "examples", "src/rules/examples/"+drlfile, sort_via_rank);
return obj_read;
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/ObjectReader.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/ObjectReader.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/ObjectReader.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -6,6 +6,7 @@
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
@@ -178,6 +179,29 @@
return null;
}
+ public static List<String> getWorkingAttributes(Class<? extends Object> classObj) {
+ Field [] element_fields = classObj.getDeclaredFields();
+ ArrayList<String> attributes = new ArrayList<String>(element_fields.length) ;
+ for( Field f: element_fields) {
+ String f_name = f.getName();
+ Class<?>[] f_class = {f.getType()};
+ if (Util.isSimpleType(f_class)) {
+ Annotation[] annotations = f.getAnnotations();
+
+ // iterate over the annotations to locate the MaxLength constraint if it exists
+ DomainSpec spec = null;
+ for (Annotation a : annotations) {
+ if (a instanceof DomainSpec) {
+ spec = (DomainSpec)a; // here it is !!!
+ if (!spec.ignore())
+ attributes.add(f_name);
+ }
+ }
+ }
+ }
+ return attributes;
+ }
+
//read(Class<?> element_class, Collection<Domain<?>> collection, String data, String separator)
public static Object read_(Class<?> element_class, Collection<Domain<?>> domains, String data, String separator) {
@@ -455,7 +479,4 @@
throw new IOException("field assignment failure:" + e);
}
}
-
-
-
}
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/RulePrinter.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/RulePrinter.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/RulePrinter.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -22,8 +22,23 @@
private Stack<NodeValue> nodes;
private Object ruleObject;
+
+ private boolean ONLY_ACTIVE = true;
+ private int num_facts;
//private RuleComparator rule_comp = new RuleComparator();
+
+ public RulePrinter(int num_facts) {
+ ruleText = new ArrayList<String>();
+ //rule_list = new ArrayList<ArrayList<NodeValue>>();
+ rules = new ArrayList<Rule>();
+
+ /* most important */
+ nodes = new Stack<NodeValue>();
+
+ this.num_facts = num_facts;
+ }
+
public RulePrinter() {
ruleText = new ArrayList<String>();
//rule_list = new ArrayList<ArrayList<NodeValue>>();
@@ -33,17 +48,10 @@
nodes = new Stack<NodeValue>();
}
- public void printer(DecisionTree dt, String packageName, String outputFile) {//, PrintStream object
+ public void printer(DecisionTree dt, String packageName, String outputFile, boolean sort) {//, PrintStream object
ruleObject = dt.getName();
dfs(dt.getRoot());
-
-// int j = 0;
-// for( String rule: ruleText) {
-// j++;
-// System.out.println("Rule " +j + " suggests that \n"+ rule +".\n");
-// }
-
- //String outputFile = new String("src/id3/rules"+".drl");
+
if (outputFile!=null) {
if (packageName != null)
write("package " + packageName +";\n\n", false, outputFile);
@@ -54,21 +62,37 @@
// TODO Auto-generated catch block
e.printStackTrace();
}
-// write("/* \n", false, outputFile);
-// write(" * Spitting the rules= \n", true, outputFile);
-// write(" */ \n", true, outputFile);
}
+ if (sort)
+ Collections.sort(rules, Rule.getRankComparator());
+
+ int total_num_facts=0;
int i = 0;
- //Collections.sort(rules, Rule.getRankComparator());
for( Rule rule: rules) {
i++;
- System.out.println("//rule " +i + " write to drl \n"+ rule +"\n");
- if (outputFile!=null) {
- write(rule.toString(), true, outputFile);
- write("\n", true, outputFile);
+ if (ONLY_ACTIVE) {
+ if (rule.getRank() >= 0) {
+ System.out.println("//Active rules " +i + " write to drl \n"+ rule +"\n");
+ if (outputFile!=null) {
+ write(rule.toString(), true, outputFile);
+ write("\n", true, outputFile);
+ }
+ }
+
+ } else {
+ System.out.println("//rule " +i + " write to drl \n"+ rule +"\n");
+ if (outputFile!=null) {
+ write(rule.toString(), true, outputFile);
+ write("\n", true, outputFile);
+ }
}
+ total_num_facts += rule.getPopularity();
}
+ if (outputFile!=null) {
+ write("//THE END: Total number of facts correctly classified= "+ total_num_facts, true, outputFile);
+ write("\n", true, outputFile); // EOF
+ }
}
public Object getRuleObject() {
return ruleObject;
@@ -195,6 +219,7 @@
private int id;
private String attr_obj;
private double rank;
+ private double popularity;
private ArrayList<NodeValue> conditions;
private ArrayList<NodeValue> actions;
@@ -213,6 +238,7 @@
public void addAction(NodeValue current) {
actions.add(new NodeValue(current.getNode(), current.getNodeValue()));
rank = ((LeafNode)current.getNode()).getRank();
+ popularity = ((LeafNode)current.getNode()).getNum_facts_classified();
}
public void setObject(String obj) {
attr_obj= obj;
@@ -231,6 +257,13 @@
this.id= id;
}
+ public double getPopularity() {
+ return popularity;
+ }
+
+ public void setPopularity(double popularity) {
+ this.popularity = popularity;
+ }
public String toString() {
@@ -243,8 +276,9 @@
System.out.println( "Goodbye: " + message );
end
*/
-
+
String out = ""; //"rule \"#"+getId()+" "+decision+" rank:"+rank+"\" \n";
+
out += "\t when";
out += "\n\t\t "+getObject() +"("+ "";
for (NodeValue cond: conditions) {
@@ -263,9 +297,10 @@
out += "\t then ";
out += "\n\t\t System.out.println(\"Decision on "+decision+"= \"+" + decision + "+\": ("+action+")\");\n";
+ if (getRank() <0)
+ out += "\n\t\t System.out.println(\"But no matching fact found = DOES not fire on\");\n";
+ out = "rule \"#"+getId()+" "+decision+ "= "+action+" classifying "+getPopularity()+" num of facts with rank:"+getRank() +"\" \n" + out;
- out = "rule \"#"+getId()+" "+decision+ "= "+action+" with rank:"+rank+"\" \n" + out;
-
out += "end\n";
return out;
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/Util.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/Util.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/Util.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -7,6 +7,8 @@
public class Util {
+ public static boolean DEBUG = false;
+
public static String ntimes(String s,int n){
StringBuffer buf = new StringBuffer();
for (int i = 0; i < n; i++) {
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukFileExample.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukFileExample.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukFileExample.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -40,7 +40,8 @@
//System.out.println(bocuksTree);
RulePrinter my_printer = new RulePrinter();
- my_printer.printer(bocuksTree, null, null);
+ boolean sort_via_rank = true;
+ my_printer.printer(bocuksTree, null, null, sort_via_rank);
}
}
Modified: labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukObjectExample.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukObjectExample.java 2008-04-01 22:59:21 UTC (rev 19370)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/test/BocukObjectExample.java 2008-04-01 23:53:50 UTC (rev 19371)
@@ -46,6 +46,7 @@
System.out.println("Time"+dt+"\n"+bocuksTree);
RulePrinter my_printer = new RulePrinter();
- my_printer.printer(bocuksTree,"test" , new String("../dt_learning/src/test/rules"+".drl"));
+ boolean sort_via_rank = true;
+ my_printer.printer(bocuksTree,"test" , new String("../dt_learning/src/test/rules"+".drl"), sort_via_rank);
}
}
More information about the jboss-svn-commits
mailing list