]
Mikhail Krestyaninov commented on JGRP-1906:
--------------------------------------------
[~belaban] I'm currently facing the same problem running JGroups with Mesos and
Docker. Once these technologies are quite popular today, may be it worth to reconsider the
approach for cleaning up the database? E.g. active JGroups members may time to time update
(using timestamp) their records and clean up old ones.
jdbc_ping doesn't delete crashed node
-------------------------------------
Key: JGRP-1906
URL:
https://issues.jboss.org/browse/JGRP-1906
Project: JGroups
Issue Type: Bug
Affects Versions: 3.6.1
Environment: n/a
Reporter: rama rama
Assignee: Bela Ban
Priority: Critical
Hi,
i am having trouble with jgroups. If a node crash, it doesn't get removed from jdbc
table.
After 10 hours of work, the table contains 100 rows (that is not a problem) but, when the
server start_up, GMS take _AGES_ to try to connect to all this dead node.
Since i am only a 'user' speaking about jgroup and i have no idea on how
internally does it work, it this normal?
I don't think so, master of the cluster, or a governor or something like that, should
be able to detect if a node is present or not (via FD,VERIFY_SUSPECT) and delete it from
JDBC table to avoid issue on startup for the next time, correct me if i am wrong.
Here a copy/paste of my current config, configured in applicative way.
------------------------------------------------------------------------------------
int min_cores = 1;
int max_cores = 50;
InetAddress bind_addr =
org.jgroups.util.Util.getAddressByPatternMatch("match-address:" +
Config.get("Cluster.bind_addr"));
stack
.addProtocol(new UDP()
.setValue("bind_addr", bind_addr)
.setValue("loopback", true)
.setValue("thread_naming_pattern",
"cl")
.setValue("timer_type", "new3")
.setValue("timer_min_threads", min_cores)
.setValue("timer_max_threads", max_cores)
.setValue("timer_keep_alive_time", Util.MIN *
10)
.setValue("timer_queue_max_size", 500)
.setValue("mcast_port",
Config.get("Cluster.mcast_port", Constant.INT_NULL))
.setValue("thread_pool_keep_alive_time",
Util.MIN * 10)
.setValue("thread_pool_min_threads",
min_cores)
.setValue("thread_pool_max_threads",
max_cores)
.setValue("thread_pool_queue_enabled", false)
.setValue("thread_pool_queue_max_size", 500)
.setValue("oob_thread_pool_keep_alive_time",
Util.MIN * 10)
.setValue("oob_thread_pool_min_threads",
min_cores)
.setValue("oob_thread_pool_max_threads",
max_cores)
.setValue("oob_thread_pool_queue_enabled",
false)
.setValue("oob_thread_pool_queue_max_size",
500)
.setValue("internal_thread_pool_keep_alive_time", Util.MIN * 10)
.setValue("internal_thread_pool_min_threads",
min_cores)
.setValue("internal_thread_pool_max_threads",
max_cores)
.setValue("internal_thread_pool_queue_enabled",
false)
.setValue("internal_thread_pool_queue_max_size", 600000)
.setValue("ucast_recv_buf_size",
org.jgroups.util.Util.readBytesInteger("5M"))
.setValue("ucast_send_buf_size",
org.jgroups.util.Util.readBytesInteger("640K"))
.setValue("mcast_recv_buf_size",
org.jgroups.util.Util.readBytesInteger("5M"))
.setValue("mcast_send_buf_size",
org.jgroups.util.Util.readBytesInteger("640K"))
)
.addProtocol(new JDBC_PING()
.setValue("connection_url",
Config.get("DB.dbdriver") + ':' + Config.get("DB.dburl") +
'/' + Config.get("DB.dbname"))
.setValue("connection_username",
Config.get("DB.dbuser"))
.setValue("connection_password",
Config.get("DB.dbpwd"))
.setValue("connection_driver",
"org.postgresql.Driver")
.setValue("initialize_sql", "CREATE TABLE
IF NOT EXISTS JGROUPSPING ( own_addr varchar(200) NOT NULL, cluster_name varchar(200) NOT
NULL, ping_data bytea DEFAULT NULL, PRIMARY KEY (own_addr, cluster_name) )")
)
.addProtocol(new MERGE3()
.setValue("max_interval", 10000)
.setValue("min_interval", 1000)
)
.addProtocol(new FD_SOCK()
.setValue("bind_addr", bind_addr)
)
.addProtocol(new FD_ALL()
)
.addProtocol(new VERIFY_SUSPECT()
.setValue("timeout", 1000)
.setValue("bind_addr", bind_addr)
)
.addProtocol(new NAKACK2()
.setValue("xmit_interval", 500)
.setValue("xmit_table_num_rows", 100)
.setValue("xmit_table_msgs_per_row", 2000)
.setValue("xmit_table_max_compaction_time",
30000)
.setValue("max_msg_batch_size", 500)
.setValue("use_mcast_xmit", false)
.setValue("discard_delivered_msgs", true)
)
.addProtocol(new UNICAST3()
.setValue("xmit_table_num_rows", 100)
.setValue("xmit_table_msgs_per_row", 1000)
.setValue("xmit_table_max_compaction_time",
30000)
.setValue("max_msg_batch_size", 500)
)
.addProtocol(new STABLE()
.setValue("stability_delay", 2000)
.setValue("desired_avg_gossip", 60000)
.setValue("max_bytes",
org.jgroups.util.Util.readBytesInteger("4M"))
.setValue("cap", 0.1)
)
.addProtocol(new GMS()
.setValue("join_timeout", 3000)
.setValue("view_bundling", false)
.setValue("print_local_addr", false)
.setValue("print_physical_addrs", false)
)
.addProtocol(new UFC()
.setValue("max_credits",
org.jgroups.util.Util.readBytesInteger("4M"))
.setValue("min_threshold", 0.4)
)
.addProtocol(new MFC()
.setValue("max_credits",
org.jgroups.util.Util.readBytesInteger("4M"))
.setValue("min_threshold", 0.4)
)
.addProtocol(new FRAG2()
.setValue("frag_size",
org.jgroups.util.Util.readBytesInteger("60K"))
)
.addProtocol(new RSVP()
.setValue("resend_interval", 2000)
.setValue("timeout", 10000)
)
;
stack.init();
----------------------------