OK, I committed a first version of udp-largecluster.xml [1]. I recommend
use it instead of the config you posted below.
Comments regarding your config below:
- UDP.max_bundle_size of 4MB won't work ! Datagram packets can only be
65535 bytes max, so any packet larger than that would get dropped !
- UDP: the thread pools' max sizes are too small, I increased them. Note
that idle threads will be reaped
- PING.timeout= 360 secs ? A node will potentially block for 360 secs ?
- PING.break_on_coord_rsp is false ? Don't you want to return when the
coordinator has sent you the discovery response ?
- I changed UNICAST to UNICAST2; the latter has no acks for every message
- Note that STABLE.max_bytes is automatically adjusted if
ergonomics=true, based on the cluster size
- GMS.join_timeout = 60 secs ?
- I increased GMS.max_bundling_time (from 50ms) for view bundling; this
handles concurrent connects much better
- FRAG2.frag_size == 2MB ? This has to be smaller than
UDP.max_bundle_size. I suggest 60K (see [1])
- Remove FLUSH; FLUSH can be used in clusters up to 20-30 nodes, but
won't scale to larger clusters
[1]
https://github.com/belaban/JGroups/blob/master/conf/udp-largecluster.xml
On 3/22/11 2:05 AM, Dave wrote:
I switched back to UDP today based on your feedback. Our config
resembles
the config below. Like I said we just increased sizes and timeouts. If you
ask me why I tweaked a certain parameter my response would be that it seemed
like a good idea based on the JGroups documentation. UDP seemed a little
more problematic than TCP, not sure why though.
<config xmlns="urn:org:jgroups"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:org:jgroups file:schema/JGroups-2.8.xsd">
<UDP
mcast_addr="${jgroups.udp.mcast_addr:228.6.7.8}"
mcast_port="${jgroups.udp.mcast_port:46655}"
tos="8"
ucast_recv_buf_size="20000000"
ucast_send_buf_size="640000"
mcast_recv_buf_size="25000000"
mcast_send_buf_size="640000"
loopback="true"
discard_incompatible_packets="true"
max_bundle_size="4000000"
max_bundle_timeout="30"
ip_ttl="${jgroups.udp.ip_ttl:2}"
enable_bundling="true"
enable_diagnostics="false"
thread_naming_pattern="pl"
thread_pool.enabled="true"
thread_pool.min_threads="2"
thread_pool.max_threads="30"
thread_pool.keep_alive_time="5000"
thread_pool.queue_enabled="true"
thread_pool.queue_max_size="1000"
thread_pool.rejection_policy="Discard"
oob_thread_pool.enabled="true"
oob_thread_pool.min_threads="2"
oob_thread_pool.max_threads="30"
oob_thread_pool.keep_alive_time="5000"
oob_thread_pool.queue_enabled="true"
oob_thread_pool.queue_max_size="1000"
oob_thread_pool.rejection_policy="Discard"
/>
<PING timeout="360000" num_initial_members="400"
break_on_coord_rsp="false"/>
<MERGE2 max_interval="30000" min_interval="10000"/>
<FD_SOCK/>
<FD_ALL/>
<BARRIER />
<pbcast.NAKACK use_stats_for_retransmission="false"
exponential_backoff="0"
use_mcast_xmit="true" gc_lag="0"
retransmit_timeout="300,600,1200,2400,3600,4800"
discard_delivered_msgs="true"/>
<UNICAST timeout="300,600,1200,2400,3600,4800"/>
<pbcast.STABLE stability_delay="1000"
desired_avg_gossip="50000"
max_bytes="1000000"/>
<pbcast.GMS print_local_addr="false" join_timeout="60000"
view_bundling="true" use_flush_if_present="false"/>
<UFC max_credits="2FRAG000" min_threshold="0.20"/>
<MFC max_credits="2000000" min_threshold="0.20"/>
<FRAG2 frag_size="2000000" />
<pbcast.STREAMING_STATE_TRANSFER/>
<!--<pbcast.STATE_TRANSFER/> -->
<pbcast.FLUSH timeout="0"/>
</config>
--
Bela Ban
Lead JGroups / Clustering Team
JBoss