[jboss-user] [JBossCache] - Re: newbie question - ReplicationException

aditsu do-not-reply at jboss.com
Thu Jan 17 23:59:33 EST 2008


anonymous wrote : In your case though, perhaps a good approach may be for both caches to attempt to write the change, and if one cache fails to write, assume that this is because the other has completed the write and hence it would not be necessary? 

That seems like a terrible approach, because there's no guarantee that the other cache succeeded. In fact, as I will show, it's almost certain that it also failed (if they have the same timeout).

anonymous wrote : Both caches failing should almost certainly not happen with optimistic locking - let me know if it does and we can investigate that.

Well, with pessimistic locking I can say that if one cache fails than both fail, practically always.
With optimistic locking, they don't always fail together, but "just" most of the time. Also, the percentage of failures seems to be greater.

Here's the modified code:


  | package CacheRepl;
  | 
  | import java.text.DateFormat;
  | import java.text.SimpleDateFormat;
  | import java.util.Date;
  | import java.util.Timer;
  | import java.util.TimerTask;
  | 
  | import javax.transaction.UserTransaction;
  | 
  | import org.apache.log4j.Logger;
  | import org.jboss.cache.Cache;
  | import org.jboss.cache.DefaultCacheFactory;
  | import org.jboss.cache.Fqn;
  | import org.jboss.cache.transaction.DummyTransactionManager;
  | import org.jboss.cache.transaction.DummyUserTransaction;
  | 
  | public class CacheThread2 {
  | 	private static final Logger LOG = Logger.getLogger("test");
  | 	private static final int THREADS = 2;
  |     private static final int REPEAT = 20;
  |     protected static final int INTERVAL = 1000;
  |     
  |     protected int count = 0;
  |     protected final Timer timer = new Timer();
  |     protected final int threadId;
  | 	protected final Cache<Object, Object> cache;
  | 	protected final DateFormat df = new SimpleDateFormat("HH:mm:ss.SSS");
  | 	protected volatile boolean done = false;
  | 	protected final boolean[] results = new boolean[REPEAT];
  | 	
  | 	protected class CacheTask extends TimerTask {
  | 		@Override
  | 		public void run() {
  | 			final String t = df.format(new Date());
  | 			final boolean result = doTransaction();
  | 			System.out.println(t + " step " + count + " T" + threadId
  | 					+ (result ? " succeeded" : " failed"));
  | 			results[count++] = result;
  | 			if (count >= REPEAT) {
  | 				timer.cancel();
  | 				done = true;
  | 			}
  | 		}
  | 	}
  | 	
  | 	public CacheThread2(final int threadId) {
  | 		this.threadId = threadId;
  | 		cache = DefaultCacheFactory.getInstance().createCache("replSync-service.xml", true);
  |     }
  | 	
  | 	public void start(final Date time) {
  | 		timer.scheduleAtFixedRate(new CacheTask(), time, INTERVAL);
  | 	}
  | 	
  | 	protected boolean doTransaction() {
  |     	final UserTransaction tx = new DummyUserTransaction(DummyTransactionManager.getInstance());
  | 		try {
  | 			tx.begin();
  | 			cache.put(new Fqn<Object>("node"), "key", "value" + threadId);
  | 			tx.commit();
  | 		    return true;
  | 		} catch (Exception e) {
  | 			LOG.error("transaction failed", e);
  | 			try {
  | 				tx.rollback();
  | 			} catch (Exception e1) {
  | 				LOG.warn("rollback failed", e1);
  | 			}
  | 			return false;
  | 		}
  | 	}
  | 	
  | 	public static void main(String[] args) throws InterruptedException {
  | 		final CacheThread2[] threads = new CacheThread2[THREADS];
  | 		for (int t = 0; t < THREADS; t++) {
  | 			threads[t] = new CacheThread2(t);
  | 		}
  | 		Date time = new Date(System.currentTimeMillis() + 2000);
  | 		for (int t = 0; t < THREADS; t++) {
  | 			threads[t].start(time);
  | 		}
  | 		while (!threads[0].done || !threads[1].done) {
  | 			Thread.sleep(500);
  | 		}
  | 		final int[] stats = new int[4];
  | 		for (int j = 0; j < REPEAT; ++j) {
  | 			stats[(threads[0].results[j] ? 2 : 0) + (threads[1].results[j] ? 1 : 0)]++;
  | 		}
  | 		System.out.println("\nBoth failed: " + stats[0] + " times");
  | 		System.out.println("First one failed: " + stats[1] + " times");
  | 		System.out.println("Second one failed: " + stats[2] + " times");
  | 		System.out.println("Both succeeded: " + stats[3] + " times");
  | 	}
  | }
  | 

and cache configuration:


  | <server>
  |     <mbean code="org.jboss.cache.jmx.CacheJmxWrapper"
  |     	name="jboss.cache:service=TreeCache">
  | 
  |         <depends>jboss:service=Naming</depends>
  |         <depends>jboss:service=TransactionManager</depends>
  | 
  |       <attribute name="TransactionManagerLookupClass">org.jboss.cache.transaction.DummyTransactionManagerLookup
  |       </attribute>
  |       <!--
  |       	    Node locking scheme:
  |             OPTIMISTIC
  |             PESSIMISTIC (default)
  |       -->
  |       <attribute name="NodeLockingScheme">OPTIMISTIC</attribute>
  |         <!--
  |             Isolation level : SERIALIZABLE
  |                               REPEATABLE_READ (default)
  |                               READ_COMMITTED
  |                               READ_UNCOMMITTED
  |                               NONE
  |         -->
  |         <attribute name="IsolationLevel">READ_COMMITTED</attribute>
  | 
  |         <attribute name="CacheMode">REPL_SYNC</attribute>
  | 
  |         <attribute name="ClusterName">Test cache</attribute>
  | 
  |         <attribute name="ClusterConfig">
  | 			<config>
  | 				<UDP bind_addr="10.0.0.226"
  | 				    mcast_addr="228.10.10.10"
  | 					mcast_port="45588"
  | 					tos="8"
  | 					ucast_recv_buf_size="20000000"
  | 					ucast_send_buf_size="640000"
  | 					mcast_recv_buf_size="25000000"
  | 					mcast_send_buf_size="640000"
  | 					loopback="false"
  | 					discard_incompatible_packets="true"
  | 					max_bundle_size="64000"
  | 					max_bundle_timeout="30"
  | 					use_incoming_packet_handler="true"
  | 					ip_ttl="2"
  | 					enable_bundling="false"
  | 					enable_diagnostics="true"
  | 					use_concurrent_stack="true"
  | 					thread_naming_pattern="pl"
  | 					thread_pool.enabled="true"
  | 					thread_pool.min_threads="1"
  | 					thread_pool.max_threads="25"
  | 					thread_pool.keep_alive_time="30000"
  | 					thread_pool.queue_enabled="true"
  | 					thread_pool.queue_max_size="10"
  | 					thread_pool.rejection_policy="Run"
  | 					oob_thread_pool.enabled="true"
  | 					oob_thread_pool.min_threads="1"
  | 					oob_thread_pool.max_threads="4"
  | 					oob_thread_pool.keep_alive_time="10000"
  | 					oob_thread_pool.queue_enabled="true"
  | 					oob_thread_pool.queue_max_size="10"
  | 					oob_thread_pool.rejection_policy="Run"/>
  | 				<PING timeout="2000" num_initial_members="3"/>
  | 				<MERGE2 max_interval="30000" min_interval="10000"/>
  | 				<FD_SOCK/>
  | 				<FD timeout="10000" max_tries="5" shun="true"/>
  | 				<VERIFY_SUSPECT timeout="1500"/>
  | 				<pbcast.NAKACK max_xmit_size="60000"
  | 					use_mcast_xmit="false" gc_lag="0"
  | 					retransmit_timeout="300,600,1200,2400,4800"
  | 					discard_delivered_msgs="true"/>
  | 				<UNICAST timeout="300,600,1200,2400,3600"/>
  | 				<pbcast.STABLE stability_delay="1000" desired_avg_gossip="50000"
  | 					max_bytes="400000"/>
  | 				<pbcast.GMS print_local_addr="true" join_timeout="5000"
  | 					join_retry_timeout="2000" shun="false"
  | 					view_bundling="true" view_ack_collection_timeout="5000"/>
  | 				<FRAG2 frag_size="60000"/>
  | 				<pbcast.STREAMING_STATE_TRANSFER use_reading_thread="true"/>
  | 				<!-- <pbcast.STATE_TRANSFER/> -->
  | 				<pbcast.FLUSH timeout="0"/>
  | 			</config>
  | 		</attribute>
  | 
  |         <attribute name="FetchInMemoryState">true</attribute>
  |         <attribute name="StateRetrievalTimeout">15000</attribute>
  |         <attribute name="SyncReplTimeout">15000</attribute>
  |         <attribute name="LockAcquisitionTimeout">500</attribute>
  |         
  |         <attribute name="UseRegionBasedMarshalling">true</attribute>
  |         <attribute name="TransactionTimeout">300</attribute>
  |     </mbean>
  | 
  | </server>
  | 

Note that LockAcquisitionTimeout should be smaller than INTERVAL in the code.

Here's an example result with PESSIMISTIC locking:


  | Both failed: 15 times
  | First one failed: 0 times
  | Second one failed: 0 times
  | Both succeeded: 5 times
  | 

and with OPTIMISTIC locking:


  | Both failed: 15 times
  | First one failed: 2 times
  | Second one failed: 1 times
  | Both succeeded: 2 times
  | 

What do you think?

Thanks
Adrian

View the original post : http://www.jboss.com/index.html?module=bb&op=viewtopic&p=4121161#4121161

Reply to the post : http://www.jboss.com/index.html?module=bb&op=posting&mode=reply&p=4121161



More information about the jboss-user mailing list