[jboss-user] [Clustering/JBoss] - Strange Memory usage in cluster

MarcusDidiusFalco do-not-reply at jboss.com
Thu Apr 17 11:26:45 EDT 2008


Hallo
I am trying to analyse a legacy application which is running on JBoss 3.2.7 which has frequent out-of-memory errors.
The application extensively uses the TreeCache for caching. This TreeCache is the only part of the application which is replicated across the nodes.
To analyse the impact of the replication on the heap we ran a test against two clustered nodes. Node 1 received all the load, no requests were targeted against node 2. During the load testing the Heap Memory Usage on Node1 goes up to max 300MB.  On Node 2 which receives no user requests Heap Memory usage goes up to 900 MB. 
I guess something is fundamentally misconfigured. 

This is the treecache-service.xml used by the application

<?xml version="1.0" encoding="UTF-8"?>
  | 
  | <!-- ===================================================================== -->
  | <!--                                                                       -->
  | <!--  TreeCache Configuration                              -->
  | <!--                                                                       -->
  | <!-- ===================================================================== -->
  | 
  | <server>
  | 
  |   <classpath codebase="lib" archives="jboss-cache.jar, jgroups.jar"/>
  | 
  |   <!-- ==================================================================== -->
  |   <!-- Distributed TreeCache                                   -->
  |   <!-- ==================================================================== -->
  | 
  |   <mbean code="org.jboss.cache.TreeCache" name="jboss.cache:service=TreeCache">
  |     <depends>jboss:service=Naming</depends> 
  |     
  | 	<!--<depends>jboss:service=TransactionManager</depends> -->
  | 
  | 
  |     <!-- Configure the TransactionManager  
  |     <attribute name="TransactionManagerLookupClass">org.jboss.cache.DummyTransactionManagerLookup</attribute> 
  | -->
  |     <!-- 
  |             Node locking level : SERIALIZABLE
  |                                  REPEATABLE_READ (default)
  |                                  READ_COMMITTED
  |                                  READ_UNCOMMITTED
  |                                  NONE        
  |      
  |     <attribute name="IsolationLevel">REPEATABLE_READ</attribute> 
  | -->
  |     <!--     Valid modes are LOCAL
  |                              REPL_ASYNC
  |                              REPL_SYNC
  |     --> 
  |     <attribute name="CacheMode">REPL_ASYNC</attribute>
  |  
  |     <!-- Name of cluster. Needs to be the same for all clusters, in order
  |              to find each other --> 
  |     <attribute name="ClusterName">TreeCache-Cluster</attribute> 
  | 
  |     <attribute name="ClusterConfig">
  |       <config>
  |         <!-- UDP: if you have a multihomed machine,
  |                 set the bind_addr attribute to the appropriate NIC IP address 
  |         --> 
  |         <!-- UDP: On Windows machines, because of the media sense feature
  |                  being broken with multicast (even after disabling media sense)
  |                  set the loopback attribute to true 
  |         --> 
  | 	<UDP mcast_addr="230.0.0.7" mcast_port="45577"
  | 	    ip_ttl="64" ip_mcast="true"
  | 	    mcast_send_buf_size="150000" mcast_recv_buf_size="80000"
  | 	    ucast_send_buf_size="150000" ucast_recv_buf_size="80000"
  | 	    loopback="false"/>
  | 	<PING timeout="2000" num_initial_members="3"
  | 	    up_thread="false" down_thread="false"/>
  | <!--
  |         <TCP start_port="7851" />
  |         <TCPPING initial_hosts="migvis1[7851]"
  |                  port_range="1" timeout="3000" num_initial_members="2"
  |                 up_thread="true" down_thread="true" /> 
  | -->
  |         <MERGE2 min_interval="10000" max_interval="20000" /> 
  |         <FD shun="true" up_thread="true" down_thread="true" /> 
  |         <VERIFY_SUSPECT timeout="1500" up_thread="false" down_thread="false" /> 
  |         <pbcast.NAKACK gc_lag="50" max_xmit_size="8192" retransmit_timeout="600,1200,2400,4800" up_thread="false"
  |            down_thread="false" /> 
  |         <UNICAST timeout="600,1200,2400" window_size="100" min_threshold="10" down_thread="false" />
  |         <pbcast.STABLE desired_avg_gossip="20000" up_thread="false" down_thread="false" />
  |         <FRAG frag_size="8192" down_thread="false" up_thread="false" />
  |         <pbcast.GMS join_timeout="5000" join_retry_timeout="2000" shun="true" print_local_addr="true" /> 
  |         <pbcast.STATE_TRANSFER up_thread="false" down_thread="false" /> 
  |       </config>
  |     </attribute>
  | 
  |        <!--
  |         Just used for async repl: use a replication queue
  |         -->
  |         <attribute name="UseReplQueue">false</attribute>
  | 
  |         <!--
  |             Replication interval for replication queue (in ms)
  |         -->
  |         <attribute name="ReplQueueInterval">0</attribute>
  | 
  |         <!--
  |             Max number of elements which trigger replication
  |         -->
  |         <attribute name="ReplQueueMaxElements">0</attribute>
  | 	
  | 	
  |          <!--
  |         Whether or not to fetch state on joining a cluster
  |        -->
  |         <attribute name="FetchStateOnStartup">true</attribute>
  | 
  |         <!--
  |             The max amount of time (in milliseconds) we wait until the
  |             initial state (ie. the contents of the cache) are retrieved from
  |             existing members in a clustered environment
  |         -->
  |         <attribute name="InitialStateRetrievalTimeout">5000</attribute>
  | 
  |         <!--
  |             Number of milliseconds to wait until all responses for a
  |             synchronous call have been received.
  |         -->
  |         <attribute name="SyncReplTimeout">10000</attribute>
  | 
  |         <!-- Max number of milliseconds to wait for a lock acquisition -->
  |         <attribute name="LockAcquisitionTimeout">15000</attribute>
  | 
  |     <!--  Name of the eviction policy class. --> 
  |     <attribute name="EvictionPolicyClass">org.jboss.cache.eviction.LRUPolicy</attribute> 
  | 
  | 
  |     <!--  Specific eviction policy configurations. This is LRU --> 
  |     <attribute name="EvictionPolicyConfig">
  |       <config>
  |         <attribute name="wakeUpIntervalSeconds">5</attribute> 
  |         <!--  Cache wide default --> 
  |         <region name="/_default_">
  |          <attribute name="maxNodes">5000</attribute> 
  |          <attribute name="timeToLiveSeconds">1000</attribute> 
  |          <!-- Maximum time an object is kept in cache regardless of idle time -->
  |          <attribute name="maxAgeSeconds">120</attribute>
  |        </region>
  | 
  |        <region name="/migvis/cache.migvis.backend.longterm">
  |          <attribute name="maxNodes">200</attribute> 
  |          <attribute name="timeToLiveSeconds">3600</attribute> 
  |        </region>
  | 
  |        <region name="/migvis/cache.migvis.frontend.longterm">
  |          <attribute name="maxNodes">200</attribute> 
  |          <attribute name="timeToLiveSeconds">3600</attribute> 
  |        </region>
  | 
  |       </config>
  |     </attribute>
  |   </mbean>
  | 
  |   
  |   <!-- ==================================================================== -->
  |   <!-- Local TreeCache                                   -->
  |   <!-- ==================================================================== -->
  | 
  |   <mbean code="org.jboss.cache.TreeCache" name="jboss.cache:service=LocalTreeCache">
  |     <depends>jboss:service=Naming</depends> 
  |     
  |     <attribute name="CacheMode">LOCAL</attribute>
  | 
  |     <!--  Name of the eviction policy class. --> 
  |     <attribute name="EvictionPolicyClass">org.jboss.cache.eviction.LRUPolicy</attribute> 
  | 
  | 
  |     <!--  Specific eviction policy configurations. This is LRU --> 
  |     <attribute name="EvictionPolicyConfig">
  |       <config>
  |         <attribute name="wakeUpIntervalSeconds">5</attribute> 
  |         <!--  Cache wide default --> 
  |         <region name="/_default_">
  |          <attribute name="maxNodes">5000</attribute> 
  |          <attribute name="timeToLiveSeconds">1000</attribute> 
  |          <!-- Maximum time an object is kept in cache regardless of idle time -->
  |          <attribute name="maxAgeSeconds">120</attribute>
  |        </region>
  | 
  |        <region name="/migvis/cache.migvis.frontend.shortterm">
  |          <attribute name="maxNodes">500</attribute> 
  |          <attribute name="timeToLiveSeconds">5</attribute> 
  |        </region>
  | 
  |        <region name="/migvis/cache.migvis.frontend.xhtml">
  |          <attribute name="maxNodes">5000</attribute> 
  |          <attribute name="timeToLiveSeconds">20</attribute> 
  |        </region>
  | 
  |       </config>
  |     </attribute>
  | 
  |     <attribute name="CacheLoaderClass">org.jboss.cache.loader.FileCacheLoader</attribute>
  |     <attribute name="CacheLoaderConfig" replace="true">
  | 	  location=${jboss.server.data.dir}${/}local_tree_cache
  |     </attribute>
  |     <attribute name="CacheLoaderShared">false</attribute>
  |     <attribute name="CacheLoaderFetchTransientState">true</attribute>
  |     <attribute name="CacheLoaderFetchPersistentState">true</attribute>
  | 
  | 
  |   </mbean>  
  |   
  | </server>
  | 
Any ideas what causes this strange memory utilization or how to best start analyzing this problem would be greatly appreciated.

Hans

View the original post : http://www.jboss.com/index.html?module=bb&op=viewtopic&p=4144924#4144924

Reply to the post : http://www.jboss.com/index.html?module=bb&op=posting&mode=reply&p=4144924



More information about the jboss-user mailing list