[rules-users] wordnet in drools
Mark Proctor
mproctor at codehaus.org
Tue Jul 8 17:16:00 EDT 2008
When I get the chance I'll look over it and see how we can improve
performance.
Would you like to write a blog for this work and we'll put it up at
http://blog.athico.com
Mark
Paul Fodor wrote:
>>> I am new to Drools and I wonder if anyone used WordNet from Drools.
>>> Basically, I want to make some simple joins, such as, find words that
>>> are in the same synset, all hypernyms of a word, hyponyms, meronyms of
>>> verbs, adjectives, etc.
>>>
>
>
>> I haven't heard of it being applied with Drools. Do let me know your
>> findings if you produce anything interesting.
>>
>
> Hi Mark,
> We couldn't find any implementation for WordNet from Drools, so we
> wrote an implementation. Anyone, please feel free to use it (attached
> below). If you want some additional functions, feel free to let me
> know. It is an interface to WordNet for Drools using a SQL database
> (MySQL). The computation times are pretty good compared with other
> Java based rule engines, but an order of magnitude slower than C-based
> Prolog systems. I wonder if this is not an indexing problem for Java
> objects (we create about 300,000 instances of the same class to store
> the WordNet ontology). We can also consult the database directly
> (without putting the WordNet ontology in JVM), but that is slower than
> having all objects in main memory (beside the same tests for the
> Prolog systems had the whole ontology in the main memory, so we want
> to do the same for Drools).
> Regards,
> Paul Fodor
>
> WordNetInterface_rules.drl:
>
> package drools
>
> import drools.WordNetInterface.S;
> import drools.WordNetInterface.G;
> import drools.WordNetInterface.Hyp;
> import drools.WordNetInterface.Mm;
> import drools.WordNetInterface.Ent;
> import drools.WordNetInterface.Sim;
> import drools.WordNetInterface.Ant;
> import drools.WordNetInterface.Reach;
>
> rule "CoordinateTerms"
> salience 10
> when
> s1 : S( si : synset_id, w1 : word)
> s2 : S( synset_id == si, word != w1, w2 : word)
> then
> insert( new Reach(w1,w2) );
> //System.out.println( "Reach " + w1 + "," + w2 );
> end
>
> rule "testAllGlosses"
> salience 10
> when
> s : S( si : synset_id, w : word)
> g : G( synset_id == si, gl : gloss)
> then
> insert( new Reach(w,gl) );
> //System.out.println( "Reach " + w + "," + gl );
> end
>
> rule "testAllHypernyms"
> salience 10
> when
> s1 : S( si1 : synset_id, w1 : word)
> h : Hyp( synset_id1 == si1, si2 : synset_id2)
> s2 : S( synset_id == si2, w2 : word)
> then
> insert( new Reach(w1,w2) );
> //System.out.println( "Reach " + w1 + "," + w2 );
> end
>
> rule "testAllHyponyms"
> salience 10
> when
> s1 : S( si1 : synset_id, w1 : word)
> h : Hyp( synset_id2 == si1, si2 : synset_id1)
> s2 : S( synset_id == si2, w2 : word)
> then
> insert( new Reach(w1,w2) );
> //System.out.println( "Reach " + w1 + "," + w2 );
> end
>
> rule "testAllMeronyms"
> salience 10
> when
> s1 : S( si1 : synset_id, w1 : word)
> m : Mm( synset_id2 == si1, si2 : synset_id1)
> s2 : S( synset_id == si2, w2 : word)
> then
> insert( new Reach(w1,w2) );
> //System.out.println( "Reach " + w1 + "," + w2 );
> end
>
> rule "testAllHolonyms"
> salience 10
> when
> s1 : S( si1 : synset_id, w1 : word)
> m : Mm( synset_id1 == si1, si2 : synset_id2)
> s2 : S( synset_id == si2, w2 : word)
> then
> insert( new Reach(w1,w2) );
> //System.out.println( "Reach " + w1 + "," + w2 );
> end
>
> rule "testAllTroponyms"
> salience 10
> when
> s1 : S( si1 : synset_id, w1 : word)
> en : Ent( synset_id1 == si1, si2 : synset_id2)
> s2 : S( synset_id == si2, w2 : word)
> then
> insert( new Reach(w1,w2) );
> // System.out.println( "Reach " + w1 + "," + w2 );
> end
>
> rule "testAllSimilars"
> salience 10
> when
> s1 : S( si1 : synset_id, w1 : word)
> sim1 : Sim( synset_id1 == si1, si2 : synset_id2)
> s2 : S( synset_id == si2, w2 : word)
> then
> insert( new Reach(w1,w2) );
> //System.out.println( "Reach " + w1 + "," + w2 );
> end
>
> rule "testAllAntonyms"
> salience 10
> when
> s1 : S( si1 : synset_id, w_n1 : w_num, w1 : word)
> a : Ant( synset_id1 == si1, w_num1 == w_n1, si2 : synset_id2,
> w_n2 : w_num2)
> s2 : S( synset_id == si2, w_num == w_n2, w2 : word)
> then
> insert( new Reach(w1,w2) );
> //System.out.println( "Reach " + w1 + "," + w2 );
> end
>
> WordNetInterface.java:
>
> package drools;
> import java.io.InputStreamReader;
> import java.sql.Connection;
> import java.sql.DriverManager;
> import java.sql.ResultSet;
> import java.sql.Statement;
>
> import org.drools.RuleBase;
> import org.drools.RuleBaseFactory;
> import org.drools.StatefulSession;
> import org.drools.audit.WorkingMemoryFileLogger;
> import org.drools.base.RuleNameEqualsAgendaFilter;
> import org.drools.compiler.PackageBuilder;
> import org.drools.compiler.PackageBuilderConfiguration;
> import org.drools.spi.AgendaFilter;
>
> public class WordNetInterface {
> public static void main(final String[] args) throws Exception {
> int test = 1; // 1-CoordinateTerms, 2-testAllGlosses,
> 3-testAllHypernyms, 4-testAllHyponyms, 5-testAllMeronyms,
> 6-testAllHolonyms, 7-testAllTroponyms, 8-testAllSimilars,
> 9-testAllAntonyms
> String testS;
> switch(test){
> case 1: testS = "CoordinateTerms";break;
> case 2: testS = "testAllGlosses";break;
> case 3: testS = "testAllHypernyms";break;
> case 4: testS = "testAllHyponyms";break;
> case 5: testS = "testAllMeronyms";break;
> case 6: testS = "testAllHolonyms";break;
> case 7: testS = "testAllTroponyms";break;
> case 8: testS = "testAllSimilars";break;
> case 9: testS = "testAllAntonyms";break;
> default: testS = "empty";
> }
>
> PackageBuilderConfiguration conf = new PackageBuilderConfiguration();
> final PackageBuilder builder = new PackageBuilder( conf );
> builder.addPackageFromDrl( new InputStreamReader(
> WordNetInterface.class.getResourceAsStream(
> "WordNetInterface_rules.drl" ) ) );
> final RuleBase ruleBase = RuleBaseFactory.newRuleBase();
> ruleBase.addPackage( builder.getPackage() );
> final StatefulSession session = ruleBase.newStatefulSession();
> final WorkingMemoryFileLogger logger = new
> WorkingMemoryFileLogger( session );
> logger.setFileName( "log/coordinateTerms" );
>
> // import WordNet data from the MySql database
> Connection conn = null;
> try{
> String userName = "xsb";
> String password = "";
> String url = "jdbc:mysql://localhost/test";
> Class.forName ("com.mysql.jdbc.Driver").newInstance ();
> conn = DriverManager.getConnection (url, userName, password);
> System.out.println ("Database connection established");
> }catch (Exception e){ System.err.println ("Cannot connect to
> database server");}
>
> Statement stat1 = conn.createStatement ();
> stat1.executeQuery ("SELECT
> Synset_id,W_num,Word,Ss_type,Sense_number,Tag_count FROM s");
> ResultSet rs1 = stat1.getResultSet ();
> S s;
> while (rs1.next ()){
> s = new
> S(rs1.getInt("Synset_id"),rs1.getInt("W_num"),rs1.getString("Word"),rs1.getInt("Ss_type"),rs1.getInt("Sense_number"),rs1.getInt("Tag_count"));
> session.insert( s );
> }
> rs1.close ();
> stat1.close ();
>
> switch(test){
> case 1: // CoordinateTerms
> break;
> case 2: // testAllGlosses
> Statement stat2 = conn.createStatement ();
> stat2.executeQuery ("SELECT synset_id,gloss FROM g");
> ResultSet rs2 = stat2.getResultSet ();
> G g;
> while (rs2.next ()){
> g = new G(rs2.getInt("synset_id"),rs2.getString("gloss"));
> session.insert( g );
> }
> rs2.close ();
> stat2.close ();
> break;
> case 3: // testAllHypernyms
> case 4: // testAllHyponyms
> Statement stat3 = conn.createStatement ();
> stat3.executeQuery ("SELECT synset_id1,synset_id2 FROM hyp");
> ResultSet rs3 = stat3.getResultSet ();
> Hyp hyp;
> while (rs3.next ()){
> hyp = new Hyp(rs3.getInt("synset_id1"),rs3.getInt("synset_id2"));
> session.insert( hyp );
> }
> rs3.close ();
> stat3.close ();
> break;
> case 5: // testAllMeronyms
> case 6: // testAllHolonyms
> Statement stat4 = conn.createStatement ();
> stat4.executeQuery ("SELECT synset_id1,synset_id2 FROM mm");
> ResultSet rs4 = stat4.getResultSet ();
> Mm mm;
> while (rs4.next ()){
> mm = new Mm(rs4.getInt("synset_id1"),rs4.getInt("synset_id2"));
> session.insert( mm );
> }
> rs4.close ();
> stat4.close ();
> break;
> case 7: // testAllTroponyms
> Statement stat5 = conn.createStatement ();
> stat5.executeQuery ("SELECT synset_id1,synset_id2 FROM ent");
> ResultSet rs5 = stat5.getResultSet ();
> Ent ent;
> while (rs5.next ()){
> ent = new Ent(rs5.getInt("synset_id1"),rs5.getInt("synset_id2"));
> session.insert( ent );
> }
> rs5.close ();
> stat5.close ();
> break;
> case 8: // testAllSimilars
> Statement stat6 = conn.createStatement ();
> stat6.executeQuery ("SELECT synset_id1,synset_id2 FROM sim");
> ResultSet rs6 = stat6.getResultSet ();
> Sim sim;
> while (rs6.next ()){
> sim = new Sim(rs6.getInt("synset_id1"),rs6.getInt("synset_id2"));
> session.insert( sim );
> }
> rs6.close ();
> stat6.close ();
> break;
> case 9: // testAllAntonyms
> Statement stat7 = conn.createStatement ();
> stat7.executeQuery ("SELECT
> synset_id1,w_num1,synset_id2,w_num2 FROM ant");
> ResultSet rs7 = stat7.getResultSet ();
> Ant ant;
> while (rs7.next ()){
> ant = new
> Ant(rs7.getInt("synset_id1"),rs7.getInt("w_num1"),rs7.getInt("synset_id2"),rs7.getInt("w_num2"));
> session.insert( ant );
> }
> rs7.close ();
> stat7.close ();
> break;
> default: //empty"
> System.out.println("not a valid test");
> }
>
> // close database
> if (conn != null){
> try{
> conn.close ();
> System.out.println ("Database connection terminated");
> }
> catch (Exception e) { /* ignore close errors */ }
> }
>
> // query Drools
> long t1 = System.currentTimeMillis();
> AgendaFilter rnf = new RuleNameEqualsAgendaFilter(testS);
> session.fireAllRules(rnf);
> long t2 = System.currentTimeMillis();
> System.out.println("Time elapsed in seconds is : " + ((t2 -
> t1) / 1000.0));
>
> logger.writeToDisk();
> session.dispose();
> }
>
> public static class S {
> private int synset_id;
> private int w_num;
> private String word;
> private int ss_type;
> private int sense_number;
> private int tag_count;
>
> public S() {}
> public S(int synset_id, int w_num, String word, int ss_type,
> int sense_number, int tag_count) {
> super();
> this.synset_id = synset_id;
> this.w_num = w_num;
> this.word = word;
> this.ss_type = ss_type;
> this.sense_number = sense_number;
> this.tag_count = tag_count;
> }
> public int getSynset_id() { return synset_id; }
> public int getW_num() { return w_num; }
> public String getWord() { return word; }
> public int getSs_type() { return ss_type; }
> public int getSense_number() { return sense_number; }
> public int getTag_count() { return tag_count; }
> }
>
> public static class G {
> private int synset_id;
> private String gloss;
> public G() {}
> public G(int synset_id, String gloss) {
> super();
> this.synset_id = synset_id;
> this.gloss = gloss;
> }
> public int getSynset_id() { return synset_id; }
> public String getGloss() { return gloss; }
> }
>
> public static class Hyp {
> private int synset_id1;
> private int synset_id2;
> public Hyp() {}
> public Hyp(int synset_id1, int synset_id2) {
> super();
> this.synset_id1 = synset_id1;
> this.synset_id2 = synset_id2;
> }
> public int getSynset_id1() { return synset_id1; }
> public int getSynset_id2() { return synset_id2; }
> }
>
> public static class Mm {
> private int synset_id1;
> private int synset_id2;
> public Mm() {}
> public Mm(int synset_id1, int synset_id2) {
> super();
> this.synset_id1 = synset_id1;
> this.synset_id2 = synset_id2;
> }
> public int getSynset_id1() { return synset_id1; }
> public int getSynset_id2() { return synset_id2; }
> }
>
> public static class Ent {
> private int synset_id1;
> private int synset_id2;
> public Ent() {}
> public Ent(int synset_id1, int synset_id2) {
> super();
> this.synset_id1 = synset_id1;
> this.synset_id2 = synset_id2;
> }
> public int getSynset_id1() { return synset_id1; }
> public int getSynset_id2() { return synset_id2; }
> }
>
> public static class Sim {
> private int synset_id1;
> private int synset_id2;
> public Sim() {}
> public Sim(int synset_id1, int synset_id2) {
> super();
> this.synset_id1 = synset_id1;
> this.synset_id2 = synset_id2;
> }
> public int getSynset_id1() { return synset_id1; }
> public int getSynset_id2() { return synset_id2; }
> }
>
> public static class Ant {
> private int synset_id1;
> private int w_num1;
> private int synset_id2;
> private int w_num2;
> public Ant() {}
> public Ant(int synset_id1, int w_num1, int synset_id2, int w_num2) {
> super();
> this.synset_id1 = synset_id1;
> this.w_num1 = w_num1;
> this.synset_id2 = synset_id2;
> this.w_num2 = w_num2;
> }
> public int getSynset_id1() { return synset_id1; }
> public int getW_num1() { return w_num1; }
> public int getSynset_id2() { return synset_id2; }
> public int getW_num2() { return w_num2; }
> }
>
>
> public static class Reach {
> private String source;
> private String target;
> public Reach() {}
> public Reach(String source, String target) {
> super();
> this.source = source;
> this.target = target;
> }
>
> public String getSource() {
> return source;
> }
> public void setSource(String source) {
> this.source = source;
> }
>
> public String getTarget() {
> return target;
> }
> public void setTarget(String target) {
> this.target = target;
> }
> }
> }
> _______________________________________________
> rules-users mailing list
> rules-users at lists.jboss.org
> https://lists.jboss.org/mailman/listinfo/rules-users
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://lists.jboss.org/pipermail/rules-users/attachments/20080708/3c27d41a/attachment.html
More information about the rules-users
mailing list