[jboss-svn-commits] JBL Code SVN: r19045 - in labs/jbossrules/contrib/machinelearning/decisiontree: src and 1 other directories.
jboss-svn-commits at lists.jboss.org
jboss-svn-commits at lists.jboss.org
Mon Mar 17 19:56:56 EDT 2008
Author: gizil
Date: 2008-03-17 19:56:55 -0400 (Mon, 17 Mar 2008)
New Revision: 19045
Added:
labs/jbossrules/contrib/machinelearning/decisiontree/src/
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/BocukFileExample.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/BocukObjectExample.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/BooleanDomain.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DBFactSet.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DecisionTree.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DecisionTreeBuilder.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DecisionTreeBuilderMT.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Domain.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DomainFactory.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/FSFactSet.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Fact.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/FactSet.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/FactSetFactory.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/LeafNode.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/LiteralDomain.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/NumericDomain.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/OOFactSet.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Restaurant.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/RulePrinter.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/TreeNode.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Util.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/WorkingMemory.java
Log:
first commit, id3 without discretization
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/BocukFileExample.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/BocukFileExample.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/BocukFileExample.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,44 @@
+package id3;
+
+
+public class BocukFileExample {
+
+ public static void main(String[] args) {
+
+
+ WorkingMemory simple = new WorkingMemory();
+
+
+ /* insert the guys */
+ //String klassCar = FactSetFactory.insertCarSet(simple);
+ String klassNursery = FactSetFactory.insertNurserySet(simple);
+
+ //String klassAdvertisement = FactSetFactory.insertAdvertisementSet(simple);
+
+
+ boolean buildTree = true;
+ if (buildTree) {
+
+ DecisionTreeBuilder bocuk = new DecisionTreeBuilder();
+ //DecisionTreeBuilderMT bocuk = new DecisionTreeBuilderMT();
+
+ long dt = System.currentTimeMillis();
+ //DecisionTree bocuksTree = bocuk.build(simple, klassCar, "classCar", null);
+ DecisionTree bocuksTree = bocuk.build(simple, klassNursery, "classnursery", null);
+
+ //DecisionTree bocuksTree = bocuk.build(simple, klassAdvertisement, "classAdvertisement", FactSetFactory.attributesOfAdvertisement);
+
+
+ dt = System.currentTimeMillis() - dt;
+ System.out.println("Time"+dt + " facts read: "+bocuksTree.getNumRead() + " num call: "+ bocuk.getNumCall() );
+ //System.out.println(bocuksTree);
+
+ RulePrinter my_printer = new RulePrinter();
+ my_printer.printer(bocuksTree);
+ }
+ }
+
+
+
+}
+
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/BocukObjectExample.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/BocukObjectExample.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/BocukObjectExample.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,45 @@
+package id3;
+
+import java.util.ArrayList;
+
+public class BocukObjectExample {
+
+ public static void main(String[] args) {
+ Restaurant arest = new Restaurant(true, false, false, true, "Full", 1, false, false, "Thai", "30-60", false);
+ Class<?> k = arest.getClass();
+ ArrayList<Object> facts = new ArrayList<Object>();
+ facts.add(new Restaurant(true, false, false, true, "Full", 1, false, false, "Thai", "30-60", false));
+ facts.add(new Restaurant(false, true, false, false, "Some", 1, false, false, "Burger", "0-10", true));
+ facts.add(new Restaurant(true, false, true, true, "Full", 1, true, false, "Thai", "10-30", true));
+ facts.add(new Restaurant(true, false, true, false, "Full", 3, false, true, "French", ">60", false));
+ facts.add(new Restaurant(false, true, false, true, "Some", 2, true, true, "Italian", "0-10", true));
+ facts.add(new Restaurant(false, true, false, false, "None", 1, true, false, "Burger", "0-10", false));
+ facts.add(new Restaurant(false, false, false, true, "Some", 2, true, true, "Thai", "0-10", true));
+ facts.add(new Restaurant(false, true, true, false, "Full", 1, true, false, "Burger", ">60", false));
+ facts.add(new Restaurant(true, true, true, true, "Full", 3, false, true, "Italian", "10-30", false));
+ facts.add(new Restaurant(false, false, false, false, "None", 1, false, false, "Thai", "0-10", false));
+ facts.add(new Restaurant(true, true, true, true, "Full", 1, false, false, "Burger", "30-60", true));
+
+ WorkingMemory simple = new WorkingMemory();
+
+
+ for(Object r: facts) {
+ try {
+ simple.insert(r);
+
+ } catch (Exception e) {
+ System.out.println("Inserting element "+ r + " and "+ e);
+ }
+ }
+
+ DecisionTreeBuilder bocuk = new DecisionTreeBuilder();
+
+ long dt = System.currentTimeMillis();
+ DecisionTree bocuksTree = bocuk.build(simple, k, "will_wait", null);
+ dt = System.currentTimeMillis() - dt;
+ System.out.println("Time"+dt+"\n"+bocuksTree);
+
+ RulePrinter my_printer = new RulePrinter();
+ my_printer.printer(bocuksTree);
+ }
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/BooleanDomain.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/BooleanDomain.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/BooleanDomain.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,82 @@
+package id3;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class BooleanDomain implements Domain<Boolean> {
+
+ private String fName;
+ private ArrayList<Boolean> fValues;
+ private boolean constant;
+
+
+ public BooleanDomain(String _name) {
+ fName = _name.trim();
+ fValues = new ArrayList<Boolean>();
+ fValues.add(Boolean.TRUE);
+ fValues.add(Boolean.FALSE);
+ }
+
+ public boolean isDiscrete() {
+ return true;
+ }
+
+ public String getName() {
+ return fName;
+ }
+
+ public boolean contains(Boolean value) {
+ return true;
+ }
+
+ public void addValue(Boolean value) {
+ // TODO Auto-generated method stub
+
+ }
+
+ public List<Boolean> getValues() {
+ return fValues;
+ }
+
+ public int hashCode() {
+ return fName.hashCode();
+ }
+
+ public boolean isConstant() {
+ return this.constant;
+ }
+
+ public void setConstant() {
+ this.constant = true;
+ }
+
+ public Object readString(String data) {
+ if (isValid(data))
+ return Boolean.parseBoolean(data);
+ else
+ return null;
+ }
+
+ public boolean isValid(String string) {
+ try{
+ Boolean.parseBoolean(string);
+ return true;
+ }
+ catch (Exception e){
+ return false;
+ }
+ }
+
+ public boolean isPossible(Object value) {
+ //if (isDiscrete() && constant)
+ if (value instanceof Boolean && fValues.contains(value))
+ return true;
+ return false;
+ }
+
+ public String toString() {
+ String out = fName;
+ return out;
+ }
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DBFactSet.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DBFactSet.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DBFactSet.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,32 @@
+package id3;
+
+import java.util.Collection;
+
+public class DBFactSet implements FactSet{
+
+ public void assignTo(Collection<Fact> c) {
+ // TODO Auto-generated method stub
+
+ }
+
+ public String getClassName() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ public Domain<?> getDomain(String attr) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ public Collection<Domain<?>> getDomains() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ public int getSize() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DecisionTree.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DecisionTree.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DecisionTree.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,235 @@
+package id3;
+
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.List;
+
+public class DecisionTree {
+
+ public long FACTS_READ = 0;
+
+ /* set of the attributes, their types*/
+ private Hashtable<String, Domain<?>> domainSet;
+
+ /* the class of the objects */
+ private String className;
+
+ /* the target attribute */
+ private String target;
+
+
+ private TreeNode root;
+
+ /* all attributes that can be used during classification */
+ private ArrayList<String> attrsToClassify;
+
+ DecisionTree(String klass) {
+ this.className = klass;
+ this.domainSet = new Hashtable<String, Domain<?>>();
+ this.attrsToClassify = new ArrayList<String>();
+ }
+
+
+ private Object getConsensus(List<Fact> facts) {
+ List<?> targetValues = getPossibleValues(this.target);
+ Hashtable<Object, Integer> facts_in_class = getStatistics(facts, target, targetValues);
+
+ int winner_vote = 0;
+ Object winner = null;
+ for (Object key: targetValues) {
+
+ int num_in_class = facts_in_class.get(key).intValue();
+ if (num_in_class > winner_vote) {
+ winner_vote = num_in_class;
+ winner = key;
+ }
+ }
+ return winner;
+ }
+
+
+//*OPT* public double calculateGain(List<FactSet> facts, String attributeName) {
+ public double calculateGain(List<Fact> facts, String attributeName) {
+ return getInformation(facts) - getGain(facts, attributeName);
+ }
+
+//*OPT* public double getGain(List<FactSet> facts, String attributeToSplit) {
+ public double getGain(List<Fact> facts, String attributeToSplit) {
+ System.out.println("What is the attributeToSplit? "+attributeToSplit);
+ List<?> attributeValues = getPossibleValues(attributeToSplit);
+
+ String attr_sum = "sum";
+
+ List<?> targetValues = getPossibleValues(target);
+ //Hashtable<Object, Integer> facts_in_class = new Hashtable<Object, Integer>(targetValues.size());
+
+ /* initialize the hashtable */
+ Hashtable<Object, Hashtable<Object, Integer>> facts_of_attribute = new Hashtable<Object, Hashtable<Object, Integer>>(attributeValues.size());
+ for (Object attr: attributeValues) {
+ facts_of_attribute.put(attr, new Hashtable<Object, Integer>(targetValues.size()+1));
+ for (Object t: targetValues) {
+ facts_of_attribute.get(attr).put(t, 0);
+ }
+ facts_of_attribute.get(attr).put(attr_sum, 0);
+ }
+
+
+ int total_num_facts= 0;
+//*OPT* for (FactSet fs: facts) {
+//*OPT* for (Fact f: fs.getFacts()) {
+ for (Fact f: facts) {
+ total_num_facts ++;
+ Object targetKey = f.getFieldValue(target);
+ //System.out.println("My key: "+ targetKey.toString());
+
+ Object attr_key = f.getFieldValue(attributeToSplit);
+ int num = facts_of_attribute.get(attr_key).get(targetKey).intValue();
+ num ++;
+ facts_of_attribute.get(attr_key).put(targetKey, num);
+
+ int total_num = facts_of_attribute.get(attr_key).get(attr_sum).intValue();
+ total_num ++;
+ facts_of_attribute.get(attr_key).put(attr_sum, total_num);
+
+// System.out.println("getGain of "+attributeToSplit+
+// ": total_num "+ facts_of_attribute.get(attr_key).get(attr_sum) +
+// " and "+facts_of_attribute.get(attr_key).get(targetKey) +
+// " at attr=" + attr_key + " of t:"+targetKey);
+ }
+ FACTS_READ += facts.size();
+//*OPT* }
+//*OPT* }
+
+ double sum = 0.0;
+ for (Object attr: attributeValues) {
+ int total_num_attr = facts_of_attribute.get(attr).get(attr_sum).intValue();
+
+ double sum_attr = 0.0;
+ if (total_num_attr > 0)
+ for (Object t: targetValues) {
+ int num_attr_target = facts_of_attribute.get(attr).get(t).intValue();
+
+ double prob = (double)num_attr_target/total_num_attr;
+ //System.out.println("prob "+ prob);
+ sum_attr += (prob == 0.0) ? 0.0 : (-1* prob * Util.log2(prob));
+ }
+ sum += ((double)total_num_attr/(double)total_num_facts) * sum_attr;
+ }
+ return sum;
+ }
+
+//*OPT* public double getInformation(List<FactSet> facts) {
+ Hashtable<Object, Integer> getStatistics(List<Fact> facts, String target, List<?> targetValues) {
+ Hashtable<Object, Integer> facts_in_class = new Hashtable<Object, Integer>(targetValues.size());
+
+ for (Object t: targetValues) {
+ facts_in_class.put(t, 0);
+ }
+
+ int total_num_facts= 0;
+//*OPT* for (FactSet fs: facts) {
+//*OPT* for (Fact f: fs.getFacts()) {
+ for (Fact f: facts) {
+ total_num_facts++;
+ Object key = f.getFieldValue(target);
+ //System.out.println("My key: "+ key.toString());
+ facts_in_class.put(key, facts_in_class.get(key).intValue() + 1); // bocuk kafa :P
+ }
+ FACTS_READ += facts.size();
+//*OPT* }
+//*OPT* }
+ return facts_in_class;
+ }
+
+
+//*OPT* public double getInformation(List<FactSet> facts) {
+ /** it returns the information value of facts
+ * entropy that characterizes the (im)purity of an arbitrary collection of examples
+ * @param facts list of facts
+ */
+ public double getInformation(List<Fact> facts) {
+
+ List<?> targetValues = getPossibleValues(this.target);
+
+ Hashtable<Object, Integer> facts_in_class = getStatistics(facts, target, targetValues);
+ int total_num_facts = facts.size();
+ double sum = 0;
+ for (Object key: targetValues) {
+ int num_in_class = facts_in_class.get(key).intValue();
+ //System.out.println("num_in_class : "+ num_in_class + " key "+ key + " and the total num "+ total_num_facts);
+ double prob = (double) num_in_class / (double) total_num_facts;
+
+ //double log2= Util.log2(prob);
+ //double plog2p= prob*log2;
+ sum += (prob == 0.0) ? 0.0 :-1* prob * Util.log2(prob);
+ //System.out.println("prob "+ prob +" and the plog(p)"+plog2p+" where the sum: "+sum);
+ }
+ return sum;
+ }
+
+ public void setTarget(String targetField) {
+ target = targetField;
+ attrsToClassify.remove(target);
+ }
+
+ public void addDomain(Domain<?> domain) {
+ domainSet.put(domain.getName(), domain);
+ if (!domain.getName().equals(this.target))
+ attrsToClassify.add(domain.getName());
+
+ }
+
+ public List<?> getPossibleValues(String fieldName) {
+ return domainSet.get(fieldName).getValues();
+ }
+
+ public List<String> getAttributes() {
+ return attrsToClassify;
+ }
+
+ public String getTarget() {
+ return target;
+ }
+
+ public String getName() {
+ return className;
+ }
+
+
+ public Domain<?> getDomain(String key) {
+ return domainSet.get(key);
+ }
+
+
+ public TreeNode getRoot() {
+ return(root);
+
+ }
+
+ public void setRoot(TreeNode root) {
+ this.root = root;
+
+ }
+
+ public long getNumRead() {
+ return FACTS_READ;
+ }
+ @Override
+ public String toString() {
+ return "Facts scanned " + FACTS_READ + "\n" + root.toString();
+ }
+
+
+ /* **OPT
+ int getTotalSize(List<FactSet> facts) {
+
+ int num = 0;
+ for(FactSet fs : facts) {
+ num += fs.getSize();
+ }
+
+ return num;
+ }
+ */
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DecisionTreeBuilder.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DecisionTreeBuilder.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DecisionTreeBuilder.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,248 @@
+package id3;
+
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Hashtable;
+import java.util.List;
+
+public class DecisionTreeBuilder {
+
+ class MyThread extends Thread {
+ DecisionTreeBuilder builder;
+ DecisionTree dt;
+ List<Fact> facts;
+ List<String> attributeNames;
+ TreeNode currentNode = null;
+ Object value = null;
+ TreeNode result = null;
+ @Override
+ public void run() {
+ result = builder.id3(dt, facts, attributeNames);
+ currentNode.addNode(value, result);
+ }
+ }
+
+ MyThread helper;
+ private int FUNC_CALL = 0;
+ private int num_fact_processed = 0;
+
+ /*
+ * treebuilder.execute(workingmemory, classtoexecute, attributestoprocess)
+
+ foreach factset in workingmemory
+ if classtoexecute.isAssignableFrom( factset.class )
+ internaladd(factset)
+
+ internalprocess(attributestoprocess)
+ */
+
+ public DecisionTree build(WorkingMemory wm, Class<?> klass, String targetField, Collection<String> workingAttributes) {
+
+ DecisionTree dt = new DecisionTree(klass.getName());
+// **OPT List<FactSet> facts = new ArrayList<FactSet>();
+ ArrayList<Fact> facts = new ArrayList<Fact>();
+ FactSet klass_fs = null;
+ for (FactSet fs: wm.getFactsets()) {
+ if (fs instanceof OOFactSet) {
+ if (klass.isAssignableFrom(((OOFactSet) fs).getFactClass())) {
+// **OPT facts.add(fs);
+ fs.assignTo(facts); // adding all facts of fs to "facts
+ }
+ }
+ if (klass.getName() == fs.getClassName()) {
+ klass_fs = fs;
+ }
+ }
+ dt.FACTS_READ += facts.size();
+
+ num_fact_processed = facts.size();
+
+ if (workingAttributes != null)
+ for (String attr: workingAttributes) {
+ dt.addDomain(klass_fs.getDomain(attr));
+ }
+ else
+ for (Domain<?> d: klass_fs.getDomains())
+ dt.addDomain(d);
+
+ dt.setTarget(targetField);
+
+ ArrayList<String> attrs = new ArrayList<String>(dt.getAttributes());
+ Collections.sort(attrs);
+
+ TreeNode root = id3(dt, facts, attrs);
+ dt.setRoot(root);
+
+ return dt;
+ }
+
+
+ public DecisionTree build(WorkingMemory wm, String klass, String targetField, Collection<String> workingAttributes) {
+
+ DecisionTree dt = new DecisionTree(klass);
+// **OPT List<FactSet> facts = new ArrayList<FactSet>();
+ ArrayList<Fact> facts = new ArrayList<Fact>();
+ FactSet klass_fs = null;
+ for (FactSet fs: wm.getFactsets()) {
+ if (klass == fs.getClassName()) {
+// **OPT facts.add(fs);
+ fs.assignTo(facts); // adding all facts of fs to "facts"
+
+ klass_fs = fs;
+ break;
+ }
+ }
+ dt.FACTS_READ += facts.size();
+ num_fact_processed = facts.size();
+
+ if (workingAttributes != null)
+ for (String attr: workingAttributes) {
+ System.out.println("Bok degil "+ attr);
+ if (attr =="aratio") {
+ System.out.println("Bok");
+ System.exit(0);
+ }
+ dt.addDomain(klass_fs.getDomain(attr));
+ }
+ else
+ for (Domain<?> d: klass_fs.getDomains())
+ dt.addDomain(d);
+
+ dt.setTarget(targetField);
+
+ ArrayList<String> attrs = new ArrayList<String>(dt.getAttributes());
+ Collections.sort(attrs);
+
+ TreeNode root = id3(dt, facts, attrs);
+ dt.setRoot(root);
+
+ return dt;
+ }
+ //*OPT* private TreeNode decisionTreeLearning(List<FactSet> facts,
+ //*OPT* List<String> attributeNames) {
+ private TreeNode id3(DecisionTree dt, List<Fact> facts, List<String> attributeNames) {
+
+ FUNC_CALL ++;
+ if (facts.size() == 0) {
+ throw new RuntimeException("Nothing to classify, factlist is empty");
+ }
+ /* let's get the statistics of the results */
+ List<?> targetValues = dt.getPossibleValues(dt.getTarget());
+ Hashtable<Object, Integer> stats = dt.getStatistics(facts, dt.getTarget(), targetValues);
+
+ int winner_vote = 0;
+ int num_supporters = 0;
+ Object winner = null;
+ for (Object key: targetValues) {
+
+ int num_in_class = stats.get(key).intValue();
+ if (num_in_class>0)
+ num_supporters ++;
+ if (num_in_class > winner_vote) {
+ winner_vote = num_in_class;
+ winner = key;
+ }
+ }
+
+ /* if all elements are classified to the same value */
+ if (num_supporters == 1) {
+ //*OPT* return new LeafNode(facts.get(0).getFact(0).getFieldValue(target));
+ LeafNode classifiedNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ classifiedNode.setRank((double)facts.size()/(double)num_fact_processed);
+ return classifiedNode;
+ }
+
+ /* if there is no attribute left in order to continue */
+ if (attributeNames.size() == 0) {
+ /* an heuristic of the leaf classification*/
+ LeafNode noAttributeLeftNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ noAttributeLeftNode.setRank((double)winner_vote/(double)num_fact_processed);
+ return noAttributeLeftNode;
+ }
+
+ /* id3 starts */
+ String chosenAttribute = attributeWithGreatestGain(dt, facts, attributeNames);
+
+ System.out.println(Util.ntimes("*", 20)+" 1st best attr: "+ chosenAttribute);
+
+ TreeNode currentNode = new TreeNode(dt.getDomain(chosenAttribute));
+ //ConstantDecisionTree m = majorityValue(ds);
+ /* the majority */
+
+ List<?> attributeValues = dt.getPossibleValues(chosenAttribute);
+ Hashtable<Object, List<Fact> > filtered_facts = splitFacts(facts, chosenAttribute, attributeValues);
+ dt.FACTS_READ += facts.size();
+
+
+// if (FUNC_CALL ==5) {
+// System.out.println("FUNC_CALL:" +FUNC_CALL);
+// System.exit(0);
+// }
+ for (int i = 0; i < attributeValues.size(); i++) {
+ /* split the last two class at the same time */
+ Object value = attributeValues.get(i);
+
+ ArrayList<String> attributeNames_copy = new ArrayList<String>(attributeNames);
+ attributeNames_copy.remove(chosenAttribute);
+
+ if (filtered_facts.get(value).isEmpty()) {
+ /* majority !!!! */
+ LeafNode majorityNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ majorityNode.setRank(0.0);
+ currentNode.addNode(value, majorityNode);
+ } else {
+ TreeNode newNode = id3(dt, filtered_facts.get(value), attributeNames_copy);
+ currentNode.addNode(value, newNode);
+ }
+ }
+
+ return currentNode;
+ }
+
+ //String chooseAttribute(List<FactSet> facts, List<String> attrs) {
+ public String attributeWithGreatestGain(DecisionTree dt, List<Fact> facts, List<String> attrs) {
+
+ double dt_info = dt.getInformation(facts);
+ double greatestGain = 0.0;
+ String attributeWithGreatestGain = attrs.get(0);
+ for (String attr : attrs) {
+ double gain = dt_info - dt.getGain(facts, attr);
+ System.out.println("Attribute: "+attr +" the gain: "+gain);
+ if (gain > greatestGain) {
+ greatestGain = gain;
+ attributeWithGreatestGain = attr;
+ }
+ }
+
+ return attributeWithGreatestGain;
+ }
+
+ public Hashtable<Object, List<Fact> > splitFacts(List<Fact> facts, String attributeName,
+ List<?> attributeValues) {
+ Hashtable<Object, List<Fact> > factLists = new Hashtable<Object, List<Fact> >(attributeValues.size());
+ for (Object v: attributeValues) {
+ factLists.put(v, new ArrayList<Fact>());
+ }
+ for (Fact f : facts) {
+ factLists.get(f.getFieldValue(attributeName)).add(f);
+ }
+ return factLists;
+ }
+
+ public void testEntropy(DecisionTree dt, List<Fact> facts) {
+ double initial_info = dt.getInformation(facts); //entropy value
+
+ System.out.println("initial_information: "+ initial_info);
+
+ String first_attr = attributeWithGreatestGain(dt, facts, dt.getAttributes());
+
+ System.out.println("best attr: "+ first_attr);
+ }
+
+ public int getNumCall() {
+ return FUNC_CALL;
+ }
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DecisionTreeBuilderMT.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DecisionTreeBuilderMT.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DecisionTreeBuilderMT.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,310 @@
+package id3;
+
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Hashtable;
+import java.util.List;
+
+public class DecisionTreeBuilderMT {
+
+ class MyThread extends Thread {
+ DecisionTreeBuilderMT builder;
+ DecisionTree dt;
+ List<Fact> facts;
+ List<String> attributeNames;
+ TreeNode currentNode = null;
+ Object value = null;
+ TreeNode result = null;
+ @Override
+ public void run() {
+ result = builder.id3(dt, facts, attributeNames);
+ currentNode.addNode(value, result);
+ }
+ }
+
+ MyThread helper;
+ private int FUNC_CALL = 0;
+ private int num_fact_processed = 0;
+
+ /*
+ * treebuilder.execute(workingmemory, classtoexecute, attributestoprocess)
+
+ foreach factset in workingmemory
+ if classtoexecute.isAssignableFrom( factset.class )
+ internaladd(factset)
+
+ internalprocess(attributestoprocess)
+ */
+
+ public DecisionTree build(WorkingMemory wm, Class<?> klass, String targetField, Collection<String> workingAttributes) {
+
+ DecisionTree dt = new DecisionTree(klass.getName());
+// **OPT List<FactSet> facts = new ArrayList<FactSet>();
+ ArrayList<Fact> facts = new ArrayList<Fact>();
+ FactSet klass_fs = null;
+ for (FactSet fs: wm.getFactsets()) {
+ if (fs instanceof OOFactSet) {
+ if (klass.isAssignableFrom(((OOFactSet)fs).getFactClass())) {
+// **OPT facts.add(fs);
+ ((OOFactSet)fs).assignTo(facts); // adding all facts of fs to "facts"
+
+ if (klass == ((OOFactSet)fs).getFactClass()) {
+ klass_fs = fs;
+ }
+ }
+ } else if (klass.getName()== fs.getClassName()) {
+
+ }
+
+ }
+ dt.FACTS_READ += facts.size();
+
+ num_fact_processed = facts.size();
+
+ if (workingAttributes != null)
+ for (String attr: workingAttributes) {
+ dt.addDomain(klass_fs.getDomain(attr));
+ }
+ else
+ for (Domain<?> d: klass_fs.getDomains())
+ dt.addDomain(d);
+
+ dt.setTarget(targetField);
+
+ ArrayList<String> attrs = new ArrayList<String>(dt.getAttributes());
+ Collections.sort(attrs);
+
+ helper = new MyThread();
+// System.out.println("IS ALIVE"+helper.isAlive());
+ TreeNode root = id3(dt, facts, attrs);
+ try {
+ helper.join();
+ } catch (InterruptedException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ dt.setRoot(root);
+
+ return dt;
+ }
+
+ public DecisionTree build(WorkingMemory wm, String klass, String targetField, Collection<String> workingAttributes) {
+
+ DecisionTree dt = new DecisionTree(klass);
+// **OPT List<FactSet> facts = new ArrayList<FactSet>();
+ ArrayList<Fact> facts = new ArrayList<Fact>();
+ FactSet klass_fs = null;
+ for (FactSet fs: wm.getFactsets()) {
+ if (klass == fs.getClassName()) {
+// **OPT facts.add(fs);
+ fs.assignTo(facts); // adding all facts of fs to "facts"
+
+ klass_fs = fs;
+ break;
+ }
+ }
+ dt.FACTS_READ += facts.size();
+ num_fact_processed = facts.size();
+
+ if (workingAttributes != null)
+ for (String attr: workingAttributes) {
+ System.out.println("Bok degil "+ attr);
+ if (attr =="aratio") {
+ System.out.println("Bok");
+ System.exit(0);
+ }
+ dt.addDomain(klass_fs.getDomain(attr));
+ }
+ else
+ for (Domain<?> d: klass_fs.getDomains())
+ dt.addDomain(d);
+
+ dt.setTarget(targetField);
+
+ ArrayList<String> attrs = new ArrayList<String>(dt.getAttributes());
+ Collections.sort(attrs);
+
+ helper = new MyThread();
+ //System.out.println("IS ALIVE"+helper.isAlive());
+ TreeNode root = id3(dt, facts, attrs);
+ try {
+ helper.join();
+ } catch (InterruptedException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ dt.setRoot(root);
+
+ return dt;
+ }
+
+ /*
+ function ID3
+ Input: (R: a set of non-target attributes,
+ C: the target attribute,
+ S: a training set) returns a decision tree;
+ begin
+ If S is empty, return a single node with
+ value Failure;
+ If S consists of records all with the same
+ value for the target attribute,
+ return a single leaf node with that value;
+ If R is empty,
+ then return a single node with the value of the most frequent of the values of the target attribute
+ that are found in records of S; [in that case there may be be errors,
+ examples that will be improperly classified];
+ Let A be the attribute with largest
+ Gain(A,S) among attributes in R;
+ Let {aj| j=1,2, .., m} be the values of attribute A;
+ Let {Sj| j=1,2, .., m} be the subsets of S consisting respectively of records with value aj for A;
+ Return a tree with root labeled A and arcs labeled a1, a2, .., am going respectively
+ to the trees (ID3(R-{A}, C, S1), ID3(R-{A}, C, S2),.....,ID3(R-{A}, C, Sm);
+ Recursively apply ID3 to subsets {Sj| j=1,2, .., m} until they are empty
+ end
+
+
+ */
+ //*OPT* private TreeNode decisionTreeLearning(List<FactSet> facts,
+ //*OPT* List<String> attributeNames) {
+ //*OPT* private TreeNode decisionTreeLearning(List<FactSet> facts,
+ //*OPT* List<String> attributeNames) {
+ private TreeNode id3(DecisionTree dt, List<Fact> facts, List<String> attributeNames) {
+
+ FUNC_CALL ++;
+ if (facts.size() == 0) {
+ throw new RuntimeException("Nothing to classify, factlist is empty");
+ }
+ /* let's get the statistics of the results */
+ List<?> targetValues = dt.getPossibleValues(dt.getTarget());
+ Hashtable<Object, Integer> stats = dt.getStatistics(facts, dt.getTarget(), targetValues);
+
+ int winner_vote = 0;
+ int num_supporters = 0;
+ Object winner = null;
+ for (Object key: targetValues) {
+
+ int num_in_class = stats.get(key).intValue();
+ if (num_in_class>0)
+ num_supporters ++;
+ if (num_in_class > winner_vote) {
+ winner_vote = num_in_class;
+ winner = key;
+ }
+ }
+
+ /* if all elements are classified to the same value */
+ if (num_supporters == 1) {
+ //*OPT* return new LeafNode(facts.get(0).getFact(0).getFieldValue(target));
+ LeafNode classifiedNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ classifiedNode.setRank((double)facts.size()/(double)num_fact_processed);
+ return classifiedNode;
+ }
+
+ /* if there is no attribute left in order to continue */
+ if (attributeNames.size() == 0) {
+ /* an heuristic of the leaf classification*/
+ LeafNode noAttributeLeftNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ noAttributeLeftNode.setRank((double)winner_vote/(double)num_fact_processed);
+ return noAttributeLeftNode;
+ }
+
+ /* id3 starts */
+ String chosenAttribute = attributeWithGreatestGain(dt, facts, attributeNames);
+
+ System.out.println(Util.ntimes("*", 20)+" 1st best attr: "+ chosenAttribute);
+
+ TreeNode currentNode = new TreeNode(dt.getDomain(chosenAttribute));
+ //ConstantDecisionTree m = majorityValue(ds);
+ /* the majority */
+
+ List<?> attributeValues = dt.getPossibleValues(chosenAttribute);
+ Hashtable<Object, List<Fact> > filtered_facts = splitFacts(facts, chosenAttribute, attributeValues);
+ dt.FACTS_READ += facts.size();
+
+
+// if (FUNC_CALL ==5) {
+// System.out.println("FUNC_CALL:" +FUNC_CALL);
+// System.exit(0);
+// }
+ for (int i = 0; i < attributeValues.size(); i++) {
+ /* split the last two class at the same time */
+ Object value = attributeValues.get(i);
+
+ ArrayList<String> attributeNames_copy = new ArrayList<String>(attributeNames);
+ attributeNames_copy.remove(chosenAttribute);
+
+ if (filtered_facts.get(value).isEmpty()) {
+ /* majority !!!! */
+ LeafNode majorityNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ majorityNode.setRank(0.0);
+ currentNode.addNode(value, majorityNode);
+ } else {
+// TreeNode newNode = id3(dt, filtered_facts.get(value), attributeNames_copy);
+// currentNode.addNode(value, newNode);
+ if (helper.isAlive()) {
+ TreeNode newNode = id3(dt, filtered_facts.get(value), attributeNames_copy);
+ currentNode.addNode(value, newNode);
+ }
+ else {
+ helper.attributeNames = attributeNames_copy;
+ helper.builder = this;
+ helper.dt = dt;
+ helper.facts = filtered_facts.get(value);
+ helper.value = value;
+ helper.currentNode = currentNode;
+ helper.start();
+ System.out.println("helper thread launched");
+ }
+ }
+ }
+
+ return currentNode;
+ }
+
+ //String chooseAttribute(List<FactSet> facts, List<String> attrs) {
+ public String attributeWithGreatestGain(DecisionTree dt, List<Fact> facts, List<String> attrs) {
+
+ double dt_info = dt.getInformation(facts);
+ double greatestGain = 0.0;
+ String attributeWithGreatestGain = attrs.get(0);
+ for (String attr : attrs) {
+ double gain = dt_info - dt.getGain(facts, attr);
+ System.out.println("Attribute: "+attr +" the gain: "+gain);
+ if (gain > greatestGain) {
+ greatestGain = gain;
+ attributeWithGreatestGain = attr;
+ }
+ }
+
+ return attributeWithGreatestGain;
+ }
+
+ public Hashtable<Object, List<Fact> > splitFacts(List<Fact> facts, String attributeName,
+ List<?> attributeValues) {
+ Hashtable<Object, List<Fact> > factLists = new Hashtable<Object, List<Fact> >(attributeValues.size());
+ for (Object v: attributeValues) {
+ factLists.put(v, new ArrayList<Fact>());
+ }
+ for (Fact f : facts) {
+ factLists.get(f.getFieldValue(attributeName)).add(f);
+ }
+ return factLists;
+ }
+
+ public void testEntropy(DecisionTree dt, List<Fact> facts) {
+ double initial_info = dt.getInformation(facts); //entropy value
+
+ System.out.println("initial_information: "+ initial_info);
+
+ String first_attr = attributeWithGreatestGain(dt, facts, dt.getAttributes());
+
+ System.out.println("best attr: "+ first_attr);
+ }
+
+ public int getNumCall() {
+ return FUNC_CALL;
+ }
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Domain.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Domain.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Domain.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,65 @@
+package id3;
+
+import java.util.List;
+
+public interface Domain<T> {
+
+ boolean isConstant();
+ void setConstant();
+
+ boolean contains(T value);
+
+ String getName();
+
+ void addValue(T value);
+
+ List<T> getValues();
+
+ Object readString(String data);
+
+ String toString();
+ boolean isPossible(Object value) throws Exception;
+}
+
+
+
+/*
+workingmemory.insert(object)
+
+ factset f = factsets_hashtable[object.class]
+ if f == null
+ f = createnew_factset(object.class);
+ f.insert(object)
+
+
+factset workingmemory.createnew_factset(class)
+
+ factset newfs = new newfactset(class)
+ foreach field in class
+ domain d = domainset_hashtable[field]
+ if d == null
+ d = createnew_domain(field)
+ newfs.adddomain(d)
+
+
+factset.insert(object)
+
+ fact f;
+ foreach field in object
+ domain d = domainset_hashtable[field];
+ attribute attr = d.createattribute(field.value)
+ f.add(attr)
+ addfact(f)
+
+
+treebuilder.execute(workingmemory, classtoexecute, attributestoprocess)
+
+ foreach factset in workingmemory
+ if classtoexecute.isAssignableFrom( factset.class )
+ internaladd(factset)
+
+ internalprocess(attributestoprocess)
+
+
+*/
+
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DomainFactory.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DomainFactory.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DomainFactory.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,61 @@
+package id3;
+
+public class DomainFactory {
+ public static BooleanDomain createBooleanDomain(String name) {
+ return new BooleanDomain(name);
+
+ }
+
+ public static NumericDomain createNumericDomain(String name) {
+ return new NumericDomain(name);
+ }
+
+ public static LiteralDomain createLiteralDomain(String name) {
+ return new LiteralDomain(name);
+ }
+
+ public static Domain<?> createDomainFromClass(Class<?> c, String domainName) {
+ if (c.isPrimitive())
+ if (c.getName().equalsIgnoreCase("boolean")) {
+ System.out.println("Yuuuupiii boolean");
+ return createBooleanDomain(domainName);
+ } else if (c.getName().equalsIgnoreCase("int") ||
+ c.getName().equalsIgnoreCase("double") ||
+ c.getName().equalsIgnoreCase("float")) {
+ System.out.println("Yuuuupiii number");
+ return createNumericDomain(domainName);
+ } else
+ return createComplexDomain(c,"kicimi ye simple: "+domainName);
+ else if (c.isAssignableFrom(String.class)) {
+ System.out.println("Yuuuupiii string");
+ return createLiteralDomain(domainName);
+ } else if (c.isAssignableFrom(Integer.class) ||
+ c.isAssignableFrom(Double.class) ||
+ c.isAssignableFrom(Float.class)) {
+ return createNumericDomain(domainName);
+ } else if (c.isAssignableFrom(Boolean.class))
+ return createBooleanDomain(domainName);
+ else
+ return createComplexDomain(c,domainName);
+ }
+
+ private static Domain<?> createComplexDomain(Class<?> c, String domainName) {
+ System.out.println("Bok ye this is complex type: "+ c);
+ return null;
+ }
+
+// public static Domain<?> createDomainFromString(String data, String domainName) {
+// if (c.isNumeric()) {
+// System.out.println("Yuuuupiii string");
+// return createNumericDomain(domainName);
+// } else if (c.true/false ||
+// c.isAssignableFrom(Double.class) ||
+// c.isAssignableFrom(Float.class)) {
+// return createNumericDomain(domainName);
+// } else if (c.is literal )
+// return createLiteral(domainName);
+// else
+// return createComplexDomain(c,domainName);
+// }
+
+}
\ No newline at end of file
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/FSFactSet.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/FSFactSet.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/FSFactSet.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,133 @@
+package id3;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.List;
+
+public class FSFactSet implements FactSet{
+
+ private List<Fact> facts;
+
+ /* set of attributes defining the type of the fact */
+ //private Set<T> validDomains;
+ private Hashtable<String, Domain<?>> validDomains;
+
+ private String fs_class;
+
+
+ public FSFactSet(String element_class) {
+ this.facts = new ArrayList<Fact>();
+ this.validDomains = new Hashtable<String, Domain<?>>();
+ this.fs_class = element_class;
+ }
+
+
+ public FSFactSet(String element_class, List<Domain<?>> domains) {
+ this.facts = new ArrayList<Fact>();
+ this.validDomains = new Hashtable<String, Domain<?>>(domains.size());
+ this.fs_class = element_class;
+
+ for (Domain<?> d: domains) {
+ //d.setConstant();
+ validDomains.put(d.getName(), d);
+ }
+
+ }
+ public boolean insert(String data, List<Domain<?>> domains, String separator){
+ // assume the domains are in the same order with value
+ Fact newfact = new Fact();
+ //Hashtable<String,Object> attributes = new Hashtable<String,Object>();
+ if (data.endsWith("."))
+ data = data.substring(0, data.length()-1);
+ List<String> attributeValues = Arrays.asList(data.split(separator));
+
+ if (domains.size()== attributeValues.size()){
+
+ Iterator<Domain<?>> domain_it = domains.iterator();
+ Iterator<String> value_it = attributeValues.iterator();
+ while(domain_it.hasNext() && value_it.hasNext()){
+ Domain attr_domain = domain_it.next();
+ //String name = attr_domain.getName();
+
+ Object value = attr_domain.readString(value_it.next());
+
+ //System.out.println("Domain "+ name+ " and the value"+value);
+ try {
+ if (value == null) {
+ value = new Double(-1);
+ } else {
+ if (attr_domain.isPossible(value))
+ attr_domain.addValue(value);
+ }
+ newfact.add(attr_domain, value);
+ } catch (Exception e) {
+ System.out.println(e+ " the domain: "+attr_domain.getName()+ " does not accept "+ value);
+ //e.printStackTrace();
+ }
+ }
+ //String targetAttributeName = dataSetSpec.getTarget();
+ //AttributeSpecification attributeSpec =dataSetSpec.getAttributeSpecFor(targetAttributeName );
+ //System.out.println("Fact: "+newfact);
+ boolean result = facts.add(newfact);
+ return result;
+ }
+ else{
+ throw new RuntimeException("Unable to construct Example from " + data);
+ }
+ }
+
+
+
+ public void add(Fact newFact) {
+ facts.add(newFact);
+ }
+
+
+ public Fact getFact(int index) {
+ return facts.get(index);
+ }
+
+ public void assignTo(Collection<Fact> c) {
+ c.addAll(facts);
+ }
+
+ public int getSize() {
+ return facts.size();
+ }
+
+ /* TODO iterator */
+ public Collection<Domain<?>> getDomains() {
+ return validDomains.values();
+ }
+
+ /* TODO iterator */
+ public Collection<String> getDomainKeys() {
+ return validDomains.keySet();
+ }
+
+ public Domain<?> getDomain(String field) {
+ return validDomains.get(field);
+ }
+
+ public void addDomain(String field, Domain<?> fieldDomain) {
+ validDomains.put(field, fieldDomain);
+ }
+
+
+ public String getClassName() {
+ return fs_class;
+ }
+
+ public String toString() {
+ String out = "";
+ for (Fact f: facts) {
+ out += f.toString() +"\n";
+ }
+ return out;
+ }
+
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Fact.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Fact.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Fact.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,75 @@
+package id3;
+import java.util.Hashtable;
+import java.util.Set;
+
+
+public class Fact {
+
+ private Hashtable<String, Domain<?>> fields;
+ private Hashtable<String, Object> values;
+
+ public Fact() {
+ this.values = new Hashtable<String, Object>();
+ this.fields = new Hashtable<String, Domain<?>>();
+ /* while creating the fact i should add the possible keys, the valid domains */
+ }
+
+ public Fact(Set<Domain<?>> domains) {
+ this.fields = new Hashtable<String, Domain<?>>();
+ for (Domain<?> d: domains)
+ this.fields.put(d.getName(), d);
+ this.values = new Hashtable<String, Object>();
+ //this.attributes. of the keys are only these domains
+ /* while creating the fact i should add the possible keys, the valid domains */
+ }
+
+ /*public Fact(Hashtable<Domain<?>, Attribute<?>> attributes) {
+ this.attributes = attributes;
+ }*/
+
+ /*
+ * TODO do i need to check anything before adding
+ * maybe i should check if the domain specifications are written somewhere
+ *
+ */
+ public void add(Domain<?> its_domain, Object value) throws Exception {
+ if (!its_domain.isPossible(value))
+ throw new Exception("The value "+value +" is not possible what is going on in domain: "+ its_domain.getName());
+ //System.out.println("Bocuk wants to see the names of the domains "+ its_domain.getName());
+ fields.put(its_domain.getName(), its_domain);
+ values.put(its_domain.getName(), value);
+ }
+
+ public Object getFieldValue(String field_name) {
+ return values.get(field_name);
+ }
+
+ public String getAttributeValueAsString(String name) {
+ Object attr = getFieldValue(name);
+ return (attr != null) ? attr.toString() : null;
+ }
+
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if ((o == null) || (this.getClass() != o.getClass())) {
+ return false;
+ }
+ Fact other = (Fact) o;
+ return fields.equals(other.fields); //TODO work on the equals() fnc
+ }
+
+ public int hashCode() {
+ return fields.hashCode();
+ }
+
+ public String toString() {
+ String out = "";
+ for (String key: fields.keySet())
+ {
+ out += fields.get(key) +"="+values.get(key)+",";
+ }
+ return out;
+ }
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/FactSet.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/FactSet.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/FactSet.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,19 @@
+package id3;
+
+import java.util.Collection;
+
+public interface FactSet {
+
+ String getClassName();
+
+ void assignTo(Collection<Fact> c);
+
+ Domain<?> getDomain(String attr);
+
+ /* TODO iterator */
+ public Collection<Domain<?>> getDomains();
+
+ public int getSize();
+
+ public String toString();
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/FactSetFactory.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/FactSetFactory.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/FactSetFactory.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,196 @@
+package id3;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class FactSetFactory {
+
+ public static String insertNurserySet(WorkingMemory simple) {
+ /*
+ * not_recom, recommend, very_recom, priority, spec_prior
+
+ | attributes
+
+ parents: usual, pretentious, great_pret.
+ has_nurs: proper, less_proper, improper, critical, very_crit.
+ form: complete, completed, incomplete, foster.
+ children: 1, 2, 3, more.
+ housing: convenient, less_conv, critical.
+ finance: convenient, inconv.
+ social: nonprob, slightly_prob, problematic.
+ health: recommended, priority, not_recom.
+
+ */
+
+ String filename = "../data/nursery/nursery.data.txt";
+ String separator = ",";
+ String klass = "Nursey";
+ ArrayList<Domain<?>> domains = new ArrayList<Domain<?>>();
+ domains.add(new LiteralDomain("parents", new String[]{"usual", "pretentious", "great_pret"}));
+ domains.add(new LiteralDomain("has_nurs", new String[]{"proper", "less_proper", "improper", "critical", "very_crit"}));
+ domains.add(new LiteralDomain("form", new String[]{"complete", "completed", "incomplete", "foster"}));
+ domains.add(new LiteralDomain("children", new String[]{"1", "2", "3", "more"}));
+ domains.add(new LiteralDomain("housing", new String[]{"convenient", "less_conv", "critical"}));
+ domains.add(new LiteralDomain("finance", new String[]{"convenient", "inconv"}));
+ domains.add(new LiteralDomain("social", new String[]{"nonprob", "slightly_prob", "problematic"}));
+ domains.add(new LiteralDomain("health", new String[]{"recommended", "priority", "not_recom"}));
+ domains.add(new LiteralDomain("classnursery", new String[]{"not_recom", "recommend", "very_recom", "priority", "spec_prior"}));
+
+ for (Domain<?> d: domains) {
+ d.setConstant();
+ }
+
+ try {
+ FactSetFactory.fromFile(simple, filename, klass, domains , separator);
+ //simple.insert(facts);
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ return klass;
+ }
+
+ public static String insertCarSet(WorkingMemory simple) {
+ /*
+ * | class values
+
+ unacc, acc, good, vgood
+
+ | attributes
+
+ buying: vhigh, high, med, low.
+ maint: vhigh, high, med, low.
+ doors: 2, 3, 4, 5, more.
+ persons: 2, 4, more.
+ lug_boot: small, med, big.
+ safety: low, med, high.
+
+ */
+
+ String filename = "../data/car/car.data.txt";
+ String separator = ",";
+ String klass = "Car";
+ ArrayList<Domain<?>> domains = new ArrayList<Domain<?>>();
+ domains.add(new LiteralDomain("buying", new String[]{"vhigh", "high", "med", "low"}));
+ domains.add(new LiteralDomain("maint", new String[]{"vhigh", "high", "med", "low"}));
+ domains.add(new LiteralDomain("doors", new String[]{"2", "3", "4", "5more"}));
+ domains.add(new LiteralDomain("persons", new String[]{"2", "4", "more"}));
+ domains.add(new LiteralDomain("lug_boot", new String[]{"small", "med", "big"}));
+ domains.add(new LiteralDomain("safety", new String[]{"low", "med", "high"}));
+ domains.add(new LiteralDomain("classCar", new String[]{"unacc", "acc", "good", "vgood"}));
+
+ for (Domain<?> d: domains) {
+ d.setConstant();
+ }
+
+ try {
+ FactSetFactory.fromFile(simple, filename, klass, domains , separator);
+ //simple.insert(facts);
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ return klass;
+ }
+
+ public static String insertAdvertisementSet(WorkingMemory simple) {
+
+ String filename = "../data/advertisement/ad.data.txt";
+ String separator = ",";
+ String klass = "Advertisement";
+
+ String domainFileName = "../data/advertisement/data_domains.txt";
+ String separatorDomain = ":";
+ ArrayList<Domain<?>> domains;
+ //FSFactSet facts;
+ try {
+ domains = FactSetFactory.fromFileDomain(domainFileName, separatorDomain);
+
+ FactSetFactory.fromFile(simple, filename, klass, domains , separator);
+ //simple.insert(facts);
+ } catch (Exception e1) {
+ // TODO Auto-generated catch block
+ e1.printStackTrace();
+ }
+ return klass;
+
+ }
+
+
+
+ public static ArrayList<String> attributesOfAdvertisement = new ArrayList<String>();
+
+
+
+ public static ArrayList<Domain<?>> fromFileDomain(String domainFileName, String separator)
+ throws Exception {
+
+ ArrayList<Domain<?>> domains = new ArrayList<Domain<?>>();
+ NumericDomain height = new NumericDomain("height");
+ height.setContinuous();
+
+ NumericDomain width = new NumericDomain("width");
+ height.setContinuous();
+
+ NumericDomain aratio = new NumericDomain("aratio");
+ height.setContinuous();
+ domains.add(height);
+ domains.add(width);
+ domains.add(aratio);
+
+ BufferedReader reader = new BufferedReader(new InputStreamReader(
+ FactSetFactory.class.getResourceAsStream( domainFileName )));//"../data/" +
+ String line;
+ while ((line = reader.readLine()) != null) {
+ if (!line.startsWith("|")) {
+ List<String> attributeValues = Arrays.asList(line.split(separator, 2));
+ //BooleanDomain newDomain =
+ attributesOfAdvertisement.add(attributeValues.get(0));
+ domains.add(new BooleanDomain(attributeValues.get(0)));
+ }
+ }
+
+ domains.add(new LiteralDomain("classAdvertisement", new String[]{"ad", "nonad"}));
+ attributesOfAdvertisement.add("classAdvertisement");
+ System.out.println("# of domains:"+ domains.size());
+
+ return domains;
+
+ }
+ public static void fromFile(WorkingMemory wm, String filename, String klass,List<Domain<?>> domains,String separator)
+ throws Exception {
+// FSFactSet fs = new FSFactSet(klass, domains);
+//
+// for (Domain<?> d: domains) {
+// fs.addDomain(d.getName(), d);
+// }
+
+ BufferedReader reader = new BufferedReader(new InputStreamReader(
+ FactSetFactory.class.getResourceAsStream( filename )));//"../data/" +
+ String line;
+ while ((line = reader.readLine()) != null) {
+// Fact newFact = fromString(line,domains,separator);
+// fs.add(newFact);
+ //String element, String name, String separator, List<Domain<?>> domains
+ line = line.trim();
+ if (line.length()==0)
+ break;
+ wm.insert(line,klass, separator,domains);
+ }
+ }
+
+
+
+ public static Fact fromObject(Object data, List<Domain<?>> domains) {
+ Fact newfact = new Fact();
+ return newfact;
+ }
+
+
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/LeafNode.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/LeafNode.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/LeafNode.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,43 @@
+package id3;
+
+
+public class LeafNode extends TreeNode {
+ //represents leaf nodes with the target value
+ private Object targetValue;
+ private double rank;
+
+ public LeafNode(Domain<?> targetDomain, Object value){
+ super(targetDomain);
+ this.targetValue = value;
+ }
+
+ public void addNode(Object attributeValue, TreeNode node) {
+ throw new RuntimeException("cannot add Node to a leaf node");
+ }
+
+ public void addLeaf(Object attributeValue, String target, Boolean targetValue) {
+ throw new RuntimeException("cannot add Leaf to a final node");
+ }
+
+ public Object getValue() {
+ return targetValue;
+ }
+
+ public double getRank() {
+ return rank;
+ }
+
+ public void setRank(double rank) {
+ this.rank = rank;
+ }
+
+ public String toString(){
+ return "DECISION -> " + targetValue.toString();
+ }
+
+ public String toString(int depth, StringBuffer buf) {
+ buf.append(Util.ntimes("\t",depth+1));
+ buf.append("DECISION -> " +targetValue.toString()+"\n");
+ return buf.toString();
+ }
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/LiteralDomain.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/LiteralDomain.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/LiteralDomain.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,94 @@
+package id3;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class LiteralDomain implements Domain<String> {
+
+ private String fName;
+ private List<String> fValues;
+ private boolean constant;
+ //private boolean discrete;
+
+
+ public LiteralDomain(String _name) {
+ fName = _name.trim();
+ fValues = new ArrayList<String>();
+ //discrete = true;
+ }
+
+ public LiteralDomain(String _name, String[] possibleValues) {
+ fName = _name;
+ fValues = Arrays.asList(possibleValues);
+ //discrete = true;
+ }
+
+// public void setContinuous() {
+// discrete = false;
+// }
+
+
+// public boolean isDiscrete() {
+// return discrete;
+// }
+
+ public String getName() {
+ return fName;
+ }
+
+ public void addValue(String value) {
+ if (constant)
+ return;
+ //if (discrete) {
+ if (!fValues.contains(value))
+ fValues.add(value);
+// } else {
+// fValues.add(value);
+// }
+
+ }
+
+ public boolean contains(String value) {
+ for(String n: fValues) {
+ if (value.equalsIgnoreCase(n))
+ return true;
+ }
+ return false;
+ }
+
+ public List<String> getValues() {
+ return fValues;
+ }
+
+ public int hashCode() {
+ return fName.hashCode();
+ }
+
+ public boolean isConstant() {
+ return this.constant;
+ }
+
+ public void setConstant() {
+ this.constant = true;
+
+ }
+
+ public Object readString(String data) {
+ return data.trim();
+ }
+
+ public boolean isPossible(Object value) {
+ if (!(value instanceof String))
+ return false;
+ if (constant && !fValues.contains(value))
+ return false;
+ return true;
+ }
+
+ public String toString() {
+ String out = fName;
+ return out;
+ }
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/NumericDomain.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/NumericDomain.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/NumericDomain.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,153 @@
+package id3;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class NumericDomain implements Domain<Number> {
+
+ private String fName;
+ private ArrayList<Number> fValues;
+ private boolean constant;
+ private boolean discrete;
+
+
+ public NumericDomain(String _name) {
+ fName = _name.trim();
+ fValues = new ArrayList<Number>();
+ discrete = true;
+ }
+ public void setContinuous() {
+ discrete = false;
+ }
+
+ public boolean isDiscrete() {
+ return discrete;
+ }
+
+ public String getName() {
+ return fName;
+ }
+
+ public void addValue(Number value) {
+ if (constant)
+ return;
+ if (discrete) {
+ if (!fValues.contains(value))
+ fValues.add(value);
+ } else {
+ if (fValues.isEmpty()) {
+ fValues.add(value);
+ return;
+ } else if (fValues.size()==1) {
+ if (value.doubleValue() < fValues.get(0).doubleValue()) {
+ Number first = fValues.remove(0);
+ fValues.add(value);
+ fValues.add(first);
+ } else if (value.doubleValue() > fValues.get(0).doubleValue()) {
+ fValues.add(value);
+ }
+ return;
+ } else {
+ if (value.doubleValue() > fValues.get(1).doubleValue()) {
+ fValues.remove(1);
+ fValues.add(1, value);
+ return;
+ }
+ if (value.doubleValue() < fValues.get(0).doubleValue()) {
+ fValues.remove(0);
+ fValues.add(0, value);
+ return;
+ }
+ }
+ }
+
+ }
+
+ public boolean contains(Number value) {
+ for(Number n: fValues) {
+ if (value.intValue() == n.intValue() ||
+ value.doubleValue() == n.doubleValue() ||
+ value.floatValue() == n.floatValue())
+ return true;
+ }
+ return false;
+ }
+
+ public List<Number> getValues() {
+ return fValues;
+ }
+
+ public int hashCode() {
+ return fName.hashCode();
+ }
+
+ public boolean isConstant() {
+ return this.constant;
+ }
+
+ public void setConstant() {
+ this.constant = true;
+ }
+
+ public Object readString(String data) {
+ if (isValid(data))
+ return Double.parseDouble(data);
+ else
+ return null;
+ }
+
+ public boolean isValid(String string) {
+ if (string == null)
+ return true;
+ try{
+ Double.parseDouble(string);
+ return true;
+ }
+ catch (Exception e){
+ return false;
+ }
+ }
+
+ public boolean isPossible(Object value) throws Exception {
+ //System.out.println("NumericDomain.isPossible() start "+ value+ " ?");
+
+ if (!(value instanceof Number))
+ return false;
+ //System.exit(0);
+ if (constant) {
+ //System.out.println("NumericDomain.isPossible() constant "+ value+ " ?");
+ //System.exit(0);
+
+ if (discrete) {
+ if (fValues.contains(value))
+ return true;
+
+ //System.out.println("NumericDomain.isPossible() constant && discrete "+ value+ " ?");
+ //System.exit(0);
+ } else {
+ if (fValues.isEmpty() || fValues.size()==1)
+ throw new Exception("Numerical domain "+fName+" is constant and not discrete but bounds are not set: possible values size: "+ fValues.size());
+ if (((Number)value).doubleValue() >= fValues.get(0).doubleValue() &&
+ ((Number)value).doubleValue() <= fValues.get(1).doubleValue()) {
+ return true;
+ }
+ //System.out.println("NumericDomain.isPossible() "+ value+ " ?");
+ }
+ } else {
+ return true;
+ }
+
+ //System.out.println("NumericDomain.isPossible() end "+ value+ " ?");
+ //System.exit(0);
+
+ return false;
+ }
+
+ public String toString() {
+ String out = fName;
+ return out;
+ }
+
+
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/OOFactSet.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/OOFactSet.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/OOFactSet.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,133 @@
+package id3;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Hashtable;
+import java.util.List;
+
+public class OOFactSet implements FactSet{
+
+ private List<Fact> facts;
+
+ /* set of attributes defining the type of the fact */
+ //private Set<T> validDomains;
+ private Hashtable<String, Domain<?>> validDomains;
+
+ private Class<?> fs_class;
+
+ public OOFactSet(Class<?> fact_class) {//Class<? extends Object>
+ this.facts = new ArrayList<Fact>();
+ this.validDomains = new Hashtable<String, Domain<?>>();
+ this.fs_class = fact_class;
+ }
+
+ /*
+ factset.insert(object)
+ fact f;
+ foreach field in object
+ domain d = domainset_hashtable[field];
+ attribute attr = d.createattribute(field.value)
+ f.add(attr)
+ addfact(f)
+ */
+ public boolean insert(Object element) {
+ Fact f = new Fact();
+
+ Class<?> element_class = element.getClass();
+ Method [] element_methods = element_class.getDeclaredMethods();
+ for (Method m: element_methods) {
+ String m_name = m.getName();
+ String return_type_name = m.getReturnType().getName();
+ if (Util.isGetter(m_name) & Util.isSimpleType(return_type_name) ) {
+// if (!Util.isSimpleType(return_type_name))
+// continue; // in the future we should support classes
+ String field = Util.getAttributeName(m_name);
+
+ /*
+ * when u first read the element
+ * if the domain specifications are already given
+ * then read from there and
+ * dont add each new value you read, just check if it is valid
+ * otherwise you create a new domain for that attribute
+ * Domain attributeSpec = dataSetSpec.getDomain(attr_name);
+ */
+ Domain fieldDomain = validDomains.get(field);
+
+ //String
+ Object field_value;
+ try {
+ field_value = m.invoke(element);
+
+ //Object attribute = fieldDomain.createAttribute(field_value);
+ if (fieldDomain.isPossible(field_value))
+ fieldDomain.addValue(field_value);
+ f.add(fieldDomain, field_value);
+ //System.out.println("FactSet.insert f "+ f + " fielddomain name "+fieldDomain.getName()+" value: "+field_value+".");
+
+ } catch (IllegalArgumentException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (IllegalAccessException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (InvocationTargetException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+
+
+ }
+ }
+
+ boolean result = facts.add(f);
+ //System.out.println("FactSet.insert f "+ f + " result "+result+" facts.size(): "+facts.size()+".");
+ return result;
+
+
+ }
+
+ public Fact getFact(int index) {
+ return facts.get(index);
+ }
+
+ public void assignTo(Collection<Fact> c) {
+ c.addAll(facts);
+ }
+
+ public int getSize() {
+ return facts.size();
+ }
+
+ /* TODO iterator */
+ public Collection<Domain<?>> getDomains() {
+ return validDomains.values();
+ }
+
+ /* TODO iterator */
+ public Collection<String> getDomainKeys() {
+ return validDomains.keySet();
+ }
+
+ public Domain<?> getDomain(String field) {
+ return validDomains.get(field);
+ }
+
+ public void addDomain(String field, Domain<?> fieldDomain) {
+ validDomains.put(field, fieldDomain);
+ }
+
+ public Class<?> getFactClass() {
+ return fs_class;
+ }
+
+ public String getClassName() {
+ return fs_class.getName();
+ }
+
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Restaurant.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Restaurant.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Restaurant.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,147 @@
+package id3;
+
+
+public class Restaurant {
+
+
+ private boolean alternate; //yesno
+ private boolean bar; //yesno
+ private boolean fri_sat ; //yesno
+ private boolean hungry; //yesno
+ private String patrons; //String[]{"None","Some","Full"});
+ private int price; //",new String[]{"$","$$","$$$"});
+ private boolean raining; //yesno
+ private boolean reservation; //yesno
+ private String type; //",new String[]{"French","Italian","Thai","Burger"});
+ private String wait_estimate; //",new String[]{"0-10","10-30","30-60",">60"});
+ private boolean will_wait; //yesno
+
+
+ public Restaurant (boolean alt, boolean b, boolean f_s, boolean hung, String pat, int pri,
+ boolean rain, boolean reserv, String t, String wait, boolean will) {
+ alternate = alt; //yesno
+ bar = b; //yesno
+ fri_sat = f_s; //yesno
+ hungry = hung; //yesno
+ patrons = pat; //",new String[]{"None","Some","Full"});
+ price = pri; //",new String[]{"$","$$","$$$"});
+ raining = rain; //yesno
+ reservation = reserv; //yesno
+ type = t; //",new String[]{"French","Italian","Thai","Burger"});
+ wait_estimate = wait; //",new String[]{"0-10","10-30","30-60",">60"});
+ will_wait = will; //yesno
+ }
+
+
+ public boolean getAlternate() {
+ return alternate;
+ }
+
+
+ public void setAlternate(boolean alternate) {
+ this.alternate = alternate;
+ }
+
+
+ public boolean getBar() {
+ return bar;
+ }
+
+
+ public void setBar(boolean bar) {
+ this.bar = bar;
+ }
+
+
+ public boolean getFri_sat() {
+ return fri_sat;
+ }
+
+
+ public void setFri_sat(boolean fri_sat) {
+ this.fri_sat = fri_sat;
+ }
+
+
+ public boolean getHungry() {
+ return hungry;
+ }
+
+
+ public void setHungry(boolean hungry) {
+ this.hungry = hungry;
+ }
+
+
+ public String getPatrons() {
+ return patrons;
+ }
+
+
+ public void setPatrons(String patrons) {
+ this.patrons = patrons;
+ }
+
+
+ public int getPrice() {
+ return price;
+ }
+
+
+ public void setPrice(int price) {
+ this.price = price;
+ }
+
+
+ public boolean getRaining() {
+ return raining;
+ }
+
+
+ public void setRaining(boolean raining) {
+ this.raining = raining;
+ }
+
+
+ public boolean getReservation() {
+ return reservation;
+ }
+
+
+ public void setReservation(boolean reservation) {
+ this.reservation = reservation;
+ }
+
+
+ public String getType() {
+ return type;
+ }
+
+
+ public void setType(String type) {
+ this.type = type;
+ }
+
+
+ public String getWait_estimate() {
+ return wait_estimate;
+ }
+
+
+ public void setWait_estimate(String wait_estimate) {
+ this.wait_estimate = wait_estimate;
+ }
+
+
+ public boolean getWill_wait() {
+ return will_wait;
+ }
+
+
+ public void setWill_wait(boolean will_wait) {
+ this.will_wait = will_wait;
+ }
+
+
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/RulePrinter.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/RulePrinter.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/RulePrinter.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,252 @@
+package id3;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.Stack;
+
+public class RulePrinter {
+
+ private ArrayList<String> ruleText;
+ //private ArrayList<ArrayList<NodeValue>> rule_list;
+ private ArrayList<Rule> rules;
+
+ private Stack<NodeValue> nodes;
+
+ private Object ruleObject;
+ //private RuleComparator rule_comp = new RuleComparator();
+
+ public RulePrinter() {
+ ruleText = new ArrayList<String>();
+ //rule_list = new ArrayList<ArrayList<NodeValue>>();
+ rules = new ArrayList<Rule>();
+
+ /* most important */
+ nodes = new Stack<NodeValue>();
+ }
+
+ public void printer(DecisionTree dt) {//, PrintStream object
+ ruleObject = dt.getName();
+ dfs(dt.getRoot());
+
+// int j = 0;
+// for( String rule: ruleText) {
+// j++;
+// System.out.println("Rule " +j + " suggests that \n"+ rule +".\n");
+// }
+
+ int i = 0;
+ Collections.sort(rules, Rule.getRankComparator());
+ for( Rule rule: rules) {
+ i++;
+ System.out.println("//rule " +i + " write to drl \n"+ rule +"\n");
+ }
+ }
+
+ private void dfs(TreeNode my_node) {
+ NodeValue node_value = new NodeValue(my_node);
+ nodes.push(node_value);
+
+ if (my_node instanceof LeafNode) {
+ node_value.setNodeValue(((LeafNode) my_node).getValue());
+ ruleText.add(print(nodes));
+ //rule_list.add(spit(nodes));
+ // what if more than one condition (more than one leafNode)
+
+ rules.add(spitRule(nodes));
+ return;
+ }
+
+ Hashtable<Object,TreeNode> children = my_node.getChildren();
+ for (Object attributeValue : children.keySet()) {
+ //System.out.println("Domain: "+ my_node.getDomain().getName() + " the value:"+ attributeValue);
+ node_value.setNodeValue(attributeValue);
+ TreeNode child = children.get(attributeValue);
+ dfs(child);
+ nodes.pop();
+ }
+ return;
+
+
+
+
+ }
+ private ArrayList<NodeValue> spit(Stack<NodeValue> nodes) {
+ ArrayList<NodeValue> list_nodes = new ArrayList<NodeValue>(nodes.size());
+ Iterator<NodeValue> it = nodes.iterator();
+
+ while (it.hasNext()) {
+
+ NodeValue current = it.next();
+ list_nodes.add(current);
+ }
+ return list_nodes;
+ }
+
+ private Rule spitRule(Stack<NodeValue> nodes) {
+ //, Stack<NodeValue> leaves // if more than one leaf
+ Rule newRule = new Rule(nodes.size());// (nodes, leaves) //if more than one leaf
+ Iterator<NodeValue> it = nodes.iterator();
+
+ while (it.hasNext()) {
+
+ NodeValue current = it.next();
+ if (it.hasNext()) {
+ newRule.addCondition(current);
+ } else {
+ newRule.addAction(current);
+ }
+ }
+ return newRule;
+ }
+
+ private String print(Stack<NodeValue> nodes) {
+ Iterator<NodeValue> it = nodes.iterator();
+
+ String out = "rule \"1 rank:\" \n";
+ out += "\t when";
+ out += "\t\t "+ruleObject+"Object("+ "";
+ while (it.hasNext()) {
+
+ NodeValue current = it.next();
+ if (it.hasNext()) {
+ out += "" + current.getDomain() + " == "+ current.getNodeValue() +" & " ;
+ } else {
+ out = out.substring(0, out.length()-2) + ")\n";
+ out += "\n\t then ";
+ out += "\n\t\t System.out.println(\"Decision (\"" + current.getDomain() + "\") = \""+ current.getNodeValue()+");";
+ }
+ }
+
+ /*
+
+ rule "Good Bye"
+ dialect "java"
+ when
+ Message( status == Message.GOODBYE, message : message )
+ then
+ System.out.println( "Goodbye: " + message );
+ end
+ */
+ return out;
+ }
+
+}
+
+
+
+
+class Rule {
+
+ private double rank;
+ private ArrayList<NodeValue> conditions;
+ private ArrayList<NodeValue> actions;
+
+ Rule(int numCond) {
+ conditions = new ArrayList<NodeValue>(numCond);
+ actions = new ArrayList<NodeValue>(1);
+ }
+
+ public double getRank() {
+ return rank;
+ }
+
+ public void addCondition(NodeValue current) {
+ conditions.add(new NodeValue(current.getNode(), current.getNodeValue()));
+ }
+ public void addAction(NodeValue current) {
+ actions.add(new NodeValue(current.getNode(), current.getNodeValue()));
+ rank = ((LeafNode)current.getNode()).getRank();
+ }
+
+
+ public String toString() {
+ /*
+
+ rule "Good Bye"
+ dialect "java"
+ when
+ Message( status == Message.GOODBYE, message : message )
+ then
+ System.out.println( "Goodbye: " + message );
+ end
+ */
+
+ String out = "rule \"#x rank:"+rank+"\" \n";
+ out += "\t when";
+ out += "\n\t\t Object("+ "";
+ for (NodeValue cond: conditions) {
+ out += cond + " & ";
+ }
+
+ out = out.substring(0, out.length()-3) + ")\n";
+
+
+ String action = "";
+ for (NodeValue act: actions) {
+ action += act.getNodeValue() + " & ";
+ }
+ action = action.substring(0, action.length()-3);
+
+ out += "\n\t then ";
+ out += "\n\t\t System.out.println(\"Decision (\"+" + action + "+\")\");";
+
+ return out;
+ }
+
+
+ public static Comparator<Rule> getRankComparator() {
+ return new RuleComparator();
+ }
+
+ private static class RuleComparator implements Comparator<Rule>{
+ public int compare(Rule r1, Rule r2) {
+ if (r1.getRank() < r2.getRank())
+ return -1;
+ else if (r1.getRank() > r2.getRank())
+ return 1;
+ else
+ return 0;
+ }
+ }
+}
+
+
+class NodeValue {
+
+ private TreeNode node;
+ private Object nodeValue;
+
+
+ NodeValue(TreeNode n) {
+ this.node = n;
+ }
+
+ NodeValue(TreeNode n, Object value) {
+ this.node = n;
+ this.nodeValue = value;
+ }
+ public String getDomain() {
+ return node.getDomain().getName();
+ }
+
+ public TreeNode getNode() {
+ return node;
+ }
+ public void setNode(TreeNode node) {
+ this.node = node;
+ }
+ public Object getNodeValue() {
+ return nodeValue;
+ }
+ public void setNodeValue(Object nodeValue) {
+ this.nodeValue = nodeValue;
+ }
+ public String toString() {
+ return node.getDomain() + " == "+ nodeValue;
+ }
+
+}
+
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/TreeNode.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/TreeNode.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/TreeNode.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,58 @@
+package id3;
+import java.util.Hashtable;
+
+
+public class TreeNode {
+
+ private Domain<?> domain;
+ private Hashtable<Object, TreeNode> children;
+
+
+ public TreeNode(Domain<?> domain)
+ {
+ this.domain = domain;
+ this.children = new Hashtable<Object, TreeNode>();
+ }
+
+
+ public void addNode(Object attributeValue, TreeNode node) {
+ children.put(attributeValue, node);
+ }
+
+ public Domain<?> getDomain() {
+ return domain;
+ }
+
+ public void setDomain(Domain<?> domain) {
+ this.domain = domain;
+ }
+
+ public Hashtable<Object, TreeNode> getChildren() {
+ return children;
+ }
+
+ public void setChildren(Hashtable<Object, TreeNode> children) {
+ this.children = children;
+ }
+
+ public String toString() {
+ return toString(1, new StringBuffer());
+ }
+
+ public String toString(int depth, StringBuffer buf) {
+ if (domain != null) {
+ buf.append(Util.ntimes("\t", depth));
+ buf.append(Util.ntimes("***",1));
+ buf.append( domain.getName() + " \n");
+ for (Object attributeValue : children.keySet()) {
+ buf.append(Util.ntimes("\t", depth + 1));
+ buf.append("+" + attributeValue );
+ buf.append("\n");
+ TreeNode child = children.get(attributeValue);
+ buf.append(child.toString(depth + 1, new StringBuffer()));
+ }
+ }
+ return buf.toString();
+ }
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Util.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Util.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/Util.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,43 @@
+package id3;
+
+public class Util {
+
+ public static String ntimes(String s,int n){
+ StringBuffer buf = new StringBuffer();
+ for (int i = 0; i < n; i++) {
+ buf.append(s);
+ }
+ return buf.toString();
+ }
+
+ //private static HashSet<String> simpletype = new HashSet<String>(0);
+ public static boolean isSimpleType(String type_name) {
+// simpletype.contains(type_name)
+ if (type_name.equalsIgnoreCase("boolean") ||
+ type_name.equalsIgnoreCase("int") ||
+ type_name.equalsIgnoreCase("double") ||
+ type_name.equalsIgnoreCase("float") ||
+ type_name.equalsIgnoreCase("java.lang.String"))
+ return true;
+ return false;
+ }
+
+ public static boolean isGetter(String method_name) {
+ if (method_name.startsWith("get") || method_name.startsWith("is") )
+ return true;
+ return false;
+ }
+
+ public static String getAttributeName(String method_name) {
+ if (method_name.startsWith("get"))
+ return method_name.substring(3, method_name.length()).toLowerCase();
+ else if (method_name.startsWith("is"))
+ return method_name.substring(2, method_name.length()).toLowerCase();
+ return null;
+ }
+
+ public static double log2(double prob) {
+ return Math.log(prob) / Math.log(2);
+ }
+
+}
\ No newline at end of file
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/WorkingMemory.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/WorkingMemory.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/WorkingMemory.java 2008-03-17 23:56:55 UTC (rev 19045)
@@ -0,0 +1,196 @@
+package id3;
+
+import java.lang.reflect.Method;
+import java.util.Collection;
+import java.util.Hashtable;
+import java.util.List;
+
+public class WorkingMemory {
+
+ private Hashtable<String, FactSet> factsets;
+
+ private Hashtable<String, Domain<?>> domainset;
+
+ public WorkingMemory() {
+ factsets = new Hashtable<String, FactSet>();
+ domainset = new Hashtable<String, Domain<?>>();
+ }
+
+ public void insert(Object element) {
+ String element_class = element.getClass().getName();
+ //System.out.println("Get the keys:"+ factsets.keys());
+ //System.out.println("WorkingMemory.get class "+ element_class + " exist? "+ factsets.containsKey(element_class));
+
+ OOFactSet fs;
+ if (!factsets.containsKey(element_class))
+ fs = create_factset(element);
+ else
+ fs = (OOFactSet) factsets.get(element_class);//TODO should i cast
+
+ fs.insert(element);
+ System.out.println("WorkingMemory.insert(object) inserted element fs.size() "+ fs.getSize());
+ }
+
+ public void insert(String element, String name, String separator, List<Domain<?>> domains) {
+
+ FSFactSet fs;
+ if (!factsets.containsKey(name)) {
+ fs = new FSFactSet(name, domains);
+ for (Domain<?> d: domains) {
+ fs.addDomain(d.getName(), d);
+ if (domainset.containsKey(d.getName()) || domainset.contains(d)) {
+ System.out.println("WorkingMemory.insert Already exist domain bla????? name: "+name+ " domain: "+d.getName());
+ System.exit(0);
+ } else
+ domainset.put(d.getName(), d);
+ }
+ factsets.put(name, fs);
+ } else
+ fs = (FSFactSet) factsets.get(name);//TODO should i cast
+
+ fs.insert(element, domains, separator);
+ //System.out.println("WorkingMemory.insert(string) inserted element fs.size() "+ fs.getSize());
+ }
+
+// public void insert(FactSet fs) {
+// System.out.println("factset : "+ fs.getSize());
+// if (!factsets.containsKey(fs.getClassName())) {
+// for (Domain<?> d : fs.getDomains()) {
+// System.out.println("Domain"+ d.getName());
+// if (domainset.containsKey(d.getName()) || domainset.contains(d))
+// System.out.println("Already exist domain bla?????");
+// else
+// domainset.put(d.getName(), d);
+//
+// //System.out.println("WorkingMemory.create_factset field "+ field + " fielddomain name "+fieldDomain.getName()+" return_type_name: "+return_type_name+".");
+//
+//
+// }
+// factsets.put(fs.getClassName(), fs);
+// } else {
+// System.out.println("Already exist bla?????");
+// }
+// }
+
+
+ /* factset workingmemory.createnew_factset(class)
+ * => instead of the class i have to pass the object itself because i am going to invoke the method
+ * => no actually i will not invoke
+ * factset newfs = new newfactset(class)
+ * foreach field in class
+ * domain d = domainset_hashtable[field]
+ * if d == null
+ * d = createnew_domain(field)
+ * newfs.adddomain(d)=> why do you add this the factset?
+ * we said that the domains should be independent from the factset
+ */
+ private OOFactSet create_factset(Object element) {
+ //System.out.println("WorkingMemory.create_factset element "+ element );
+
+ Class<?> element_class = element.getClass();
+ OOFactSet newfs = new OOFactSet(element_class);
+
+ Method [] element_methods = element_class.getDeclaredMethods();
+ for( Method m: element_methods) {
+
+
+ String m_name = m.getName();
+ String return_type_name = m.getReturnType().getName();
+ //System.out.println("WorkingMemory.create_factset m "+ m + " method name "+m_name+" return_type_name: "+return_type_name+".");
+ if (Util.isGetter(m_name) & Util.isSimpleType(return_type_name)) {
+ String field = Util.getAttributeName(m_name);
+ /*
+ * when u first read the element
+ * if the domain specifications are already given
+ * then read from there and
+ * dont add each new value you read, just check if it is valid
+ * otherwise you create a new domain for that attribute
+ * Domain attributeSpec = dataSetSpec.getDomain(attr_name);
+ */
+ Domain<?> fieldDomain;
+ if (!domainset.containsKey(field))
+ fieldDomain = DomainFactory.createDomainFromClass(m.getReturnType(), field);
+ else
+ fieldDomain = domainset.get(field);
+
+ //System.out.println("WorkingMemory.create_factset field "+ field + " fielddomain name "+fieldDomain.getName()+" return_type_name: "+return_type_name+".");
+
+ domainset.put(field, fieldDomain);
+ newfs.addDomain(field, fieldDomain);
+
+ //System.out.println("START: WorkingMemory.create_factset domainset size "+ domainset.size() + " newfs size "+newfs.getFacts().size()+".");
+
+ }
+ }
+
+ factsets.put(element_class.getName(), newfs);
+ return newfs;
+ }
+
+ /* TODO: iterator */
+ public Collection<FactSet> getFactsets() {
+ return factsets.values();
+ }
+
+ public Domain<?> getDomain(String field) {
+ return domainset.get(field);
+ }
+
+ public boolean containsDomainKey(String field) {
+ return domainset.containsKey(field);
+ }
+
+ public void putDomain(String field, Domain<?> fieldDomain) {
+ this.domainset.put(field, fieldDomain);
+
+ }
+
+ public void putFactSet(String klass_name, FactSet newfs) {
+ factsets.put(klass_name, newfs);
+ }
+
+ public boolean containsFactSetKey(String field) {
+ return factsets.containsKey(field);
+ }
+}
+
+
+/*
+workingmemory.insert(object)
+
+ factset fs = factsets_hashtable[object.class]
+ if fs == null
+ fs = createnew_factset(object.class);
+ fs.insert(object)
+
+
+factset workingmemory.createnew_factset(class)
+
+ factset newfs = new newfactset(class)
+ foreach field in class
+ domain d = domainset_hashtable[field]
+ if d == null
+ d = createnew_domain(field)
+ newfs.adddomain(d)
+
+
+factset.insert(object)
+
+ fact f;
+ foreach field in object
+ domain d = domainset_hashtable[field];
+ attribute attr = d.createattribute(field.value)
+ f.add(attr)
+ addfact(f)
+
+
+treebuilder.execute(workingmemory, classtoexecute, attributestoprocess)
+
+ foreach factset in workingmemory
+ if classtoexecute.isAssignableFrom( factset.class )
+ internaladd(factset)
+
+ internalprocess(attributestoprocess)
+
+
+*/
\ No newline at end of file
More information about the jboss-svn-commits
mailing list