[infinispan-dev] failing queries on one node
Ales Justin
ales.justin at gmail.com
Tue Sep 4 03:09:16 EDT 2012
The tests work for me:
Running org.jboss.test.capedwarf.cluster.DatastoreTestCase
Tests run: 12, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 54.924 sec
And the Lucene* caches start fine; no stderr, etc.
But I do get warnings and sometime even some NPEs (on AS boot),
which I guess is the problem of brand new NBST:
-- node1
08:20:32,574 WARN [org.infinispan.topology.CacheTopologyControlCommand] (transport-thread-3) ISPN000071: Caught exception when handling command CacheTopologyControlCommand{cache=LuceneIndexesData, type=LEAVE, sender=node-b/capedwarf, joinInfo=null, topologyId=0, currentCH=null, pendingCH=null, throwable=null, viewId=-1}: org.infinispan.CacheException: The cache has been stopped and invocations are not allowed!
at org.infinispan.remoting.transport.jgroups.JGroupsTransport.getViewId(JGroupsTransport.java:229)
at org.infinispan.topology.ClusterTopologyManagerImpl.updateConsistentHash(ClusterTopologyManagerImpl.java:121)
at org.infinispan.topology.DefaultRebalancePolicy.updateConsistentHash(DefaultRebalancePolicy.java:131)
at org.infinispan.topology.DefaultRebalancePolicy.updateCacheMembers(DefaultRebalancePolicy.java:238)
at org.infinispan.topology.DefaultRebalancePolicy.removeLeavers(DefaultRebalancePolicy.java:203)
at org.infinispan.topology.ClusterTopologyManagerImpl.handleLeave(ClusterTopologyManagerImpl.java:169)
at org.infinispan.topology.CacheTopologyControlCommand.doPerform(CacheTopologyControlCommand.java:153)
at org.infinispan.topology.CacheTopologyControlCommand.perform(CacheTopologyControlCommand.java:136)
at org.infinispan.topology.LocalTopologyManagerImpl$1.call(LocalTopologyManagerImpl.java:253)
at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303) [classes.jar:1.6.0_33]
at java.util.concurrent.FutureTask.run(FutureTask.java:138) [classes.jar:1.6.0_33]
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886) [classes.jar:1.6.0_33]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908) [classes.jar:1.6.0_33]
at java.lang.Thread.run(Thread.java:680) [classes.jar:1.6.0_33]
-- node2
08:20:32,350 WARN [org.infinispan.topology.CacheTopologyControlCommand] (transport-thread-12) ISPN000071: Caught exception when handling command CacheTopologyControlCommand{cache=LuceneIndexesMetadata, type=LEAVE, sender=node-a/capedwarf, joinInfo=null, topologyId=0, currentCH=null, pendingCH=null, throwable=null, viewId=-1}: java.lang.InterruptedException
at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedNanos(AbstractQueuedSynchronizer.java:1024) [classes.jar:1.6.0_33]
at java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireSharedNanos(AbstractQueuedSynchronizer.java:1303) [classes.jar:1.6.0_33]
at java.util.concurrent.FutureTask$Sync.innerGet(FutureTask.java:227) [classes.jar:1.6.0_33]
at java.util.concurrent.FutureTask.get(FutureTask.java:91) [classes.jar:1.6.0_33]
at org.infinispan.topology.ClusterTopologyManagerImpl.executeOnClusterSync(ClusterTopologyManagerImpl.java:229)
at org.infinispan.topology.ClusterTopologyManagerImpl.updateConsistentHash(ClusterTopologyManagerImpl.java:124)
at org.infinispan.topology.DefaultRebalancePolicy.updateConsistentHash(DefaultRebalancePolicy.java:131)
at org.infinispan.topology.DefaultRebalancePolicy.updateCacheMembers(DefaultRebalancePolicy.java:238)
at org.infinispan.topology.DefaultRebalancePolicy.removeLeavers(DefaultRebalancePolicy.java:203)
at org.infinispan.topology.ClusterTopologyManagerImpl.handleLeave(ClusterTopologyManagerImpl.java:169)
at org.infinispan.topology.CacheTopologyControlCommand.doPerform(CacheTopologyControlCommand.java:153)
at org.infinispan.topology.CacheTopologyControlCommand.perform(CacheTopologyControlCommand.java:136)
at org.infinispan.topology.LocalTopologyManagerImpl$1.call(LocalTopologyManagerImpl.java:253)
at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303) [classes.jar:1.6.0_33]
at java.util.concurrent.FutureTask.run(FutureTask.java:138) [classes.jar:1.6.0_33]
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886) [classes.jar:1.6.0_33]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908) [classes.jar:1.6.0_33]
at java.lang.Thread.run(Thread.java:680) [classes.jar:1.6.0_33]
08:20:32,366 WARN [org.infinispan.topology.CacheTopologyControlCommand] (transport-thread-8) ISPN000071: Caught exception when handling command CacheTopologyControlCommand{cache=dist, type=LEAVE, sender=node-a/capedwarf, joinInfo=null, topologyId=0, currentCH=null, pendingCH=null, throwable=null, viewId=1}: java.lang.InterruptedException
at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedNanos(AbstractQueuedSynchronizer.java:1024) [classes.jar:1.6.0_33]
at java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireSharedNanos(AbstractQueuedSynchronizer.java:1303) [classes.jar:1.6.0_33]
at java.util.concurrent.FutureTask$Sync.innerGet(FutureTask.java:227) [classes.jar:1.6.0_33]
at java.util.concurrent.FutureTask.get(FutureTask.java:91) [classes.jar:1.6.0_33]
at org.infinispan.topology.ClusterTopologyManagerImpl.executeOnClusterSync(ClusterTopologyManagerImpl.java:229)
at org.infinispan.topology.ClusterTopologyManagerImpl.updateConsistentHash(ClusterTopologyManagerImpl.java:124)
at org.infinispan.topology.DefaultRebalancePolicy.updateConsistentHash(DefaultRebalancePolicy.java:131)
at org.infinispan.topology.DefaultRebalancePolicy.updateCacheMembers(DefaultRebalancePolicy.java:238)
at org.infinispan.topology.DefaultRebalancePolicy.removeLeavers(DefaultRebalancePolicy.java:203)
at org.infinispan.topology.ClusterTopologyManagerImpl.handleLeave(ClusterTopologyManagerImpl.java:169)
at org.infinispan.topology.CacheTopologyControlCommand.doPerform(CacheTopologyControlCommand.java:153)
at org.infinispan.topology.CacheTopologyControlCommand.perform(CacheTopologyControlCommand.java:136)
at org.infinispan.topology.LocalTopologyManagerImpl$1.call(LocalTopologyManagerImpl.java:253)
at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303) [classes.jar:1.6.0_33]
at java.util.concurrent.FutureTask.run(FutureTask.java:138) [classes.jar:1.6.0_33]
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886) [classes.jar:1.6.0_33]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908) [classes.jar:1.6.0_33]
at java.lang.Thread.run(Thread.java:680) [classes.jar:1.6.0_33]
-Ales
On Sep 4, 2012, at 4:03 AM, Mircea Markus <mircea.markus at jboss.com> wrote:
> The root cause of this, as suggested by Dan and Sanne is the fact that during a cache's creation another cache is being started.
> I've made a fix candidate[1] which I assigned to Dan. I still get some failures on Ales' test, but with a different error message.
>
> [1] https://github.com/infinispan/infinispan/pull/1281
> Cheers,
> Mircea
>
> On 3 Sep 2012, at 10:30, Ales Justin wrote:
>
>> Also to add ...
>>
>> With some more testing, it showed that it is *slave* node's queries that failed.
>>
>> I setup a static master node selection: via jgroupsMaster / jgroupsSlave.
>> When I changed the node type, different test failed.
>>
>> e.g. if node A was slave, queryOnA, indexGenAndQueryInsertOnA, testContentOnA failed.
>> if node B was slave, simply change A --> B in test names above
>>
>> This is the full test:
>> * https://raw.github.com/capedwarf/capedwarf-blue/a3b33ffad60e69a4c67f6b0e4ebe299ffa37d350/cluster-tests/src/test/java/org/jboss/test/capedwarf/cluster/DatastoreTestCase.java
>>
>> As Sanne mentioned, that locking issue on Lucene cache's looks suspicious ...
>>
>> -Ales
>>
>> On Sep 3, 2012, at 11:20 AM, Sanne Grinovero <sanne at infinispan.org> wrote:
>>
>>> Team, I hope someone more familiar with state transfer and cache
>>> starting logic can help Ales as I don't think this is a Query or
>>> Hibernate Search related problem.
>>> Even more as his test was working not so long ago, and there where no
>>> Search releases nor mayor changes in Infinispan Query.
>>>
>>> This problem and the locking errors he reported in a different post
>>> are very likely related: if the background thread in Hibernate
>>> Search's backend fails to update the index because of the locking
>>> issue, it doesn't surprise me that the other nodes are unable to see
>>> an updated index (as write operations failed).
>>>
>>> Cheers,
>>> Sanne
>>>
>>> On 3 September 2012 01:00, Ales Justin <ales.justin at gmail.com> wrote:
>>>> After fixing that ISPN-2253:
>>>> *
>>>> https://github.com/alesj/infinispan/commit/05229f426e829742902de0305488282b8283b8e5
>>>> (Sanne is working on even cleaner solution)
>>>>
>>>> It now also looks like our CapeDwarf clustering tests have been (almost)
>>>> fixed.
>>>> It now appears as if one node cannot do proper querying:
>>>>
>>>> Deployment dep1 goes to node1, same deployment dep2 goes to node2.
>>>>
>>>> "Failed tests:
>>>> queryOnA(org.jboss.test.capedwarf.cluster.DatastoreTestCase): Number of
>>>> entities: 0"
>>>>
>>>> Where as "queryOnB" doesn't fail.
>>>>
>>>> @Sanne -- do we have any query tests covering this kind of scenario?
>>>>
>>>> I'm using jgroups' auto-master selection.
>>>>
>>>> Tomorrow I'll try fixed jgroups master/slave selection,
>>>> and the new dynamic auto-master selection.
>>>>
>>>> Any ideas still welcome. ;-)
>>>>
>>>> -Ales
>>>>
>>>> ---
>>>>
>>>> @InSequence(30)
>>>> @Test
>>>> @OperateOnDeployment("dep1")
>>>> public void putStoresEntityOnDepA() throws Exception {
>>>> Entity entity = createTestEntity("KIND", 1);
>>>> getService().put(entity);
>>>> assertStoreContains(entity);
>>>> }
>>>>
>>>> @InSequence(31)
>>>> @Test
>>>> @OperateOnDeployment("dep2")
>>>> public void putStoresEntityOnDepB() throws Exception {
>>>> Entity entity = createTestEntity("KIND", 2);
>>>> getService().put(entity);
>>>> assertStoreContains(entity);
>>>> }
>>>>
>>>> @InSequence(40)
>>>> @Test
>>>> @OperateOnDeployment("dep1")
>>>> public void getEntityOnDepA() throws Exception {
>>>> waitForSync();
>>>>
>>>> Key key = KeyFactory.createKey("KIND", 1);
>>>> Entity lookup = getService().get(key);
>>>>
>>>> Assert.assertNotNull(lookup);
>>>>
>>>> Entity entity = createTestEntity("KIND", 1);
>>>> Assert.assertEquals(entity, lookup);
>>>> }
>>>>
>>>> @InSequence(50)
>>>> @Test
>>>> @OperateOnDeployment("dep2")
>>>> public void getEntityOnDepB() throws Exception {
>>>> waitForSync();
>>>>
>>>> Entity entity = createTestEntity("KIND", 1);
>>>> assertStoreContains(entity);
>>>> }
>>>>
>>>> @InSequence(52)
>>>> @Test
>>>> @OperateOnDeployment("dep1")
>>>> public void queryOnA() throws Exception {
>>>> waitForSync();
>>>>
>>>> int count = getService().prepare(new
>>>> Query("KIND")).countEntities(Builder.withDefaults());
>>>> Assert.assertTrue("Number of entities: " + count, count == 2);
>>>> }
>>>>
>>>> @InSequence(53)
>>>> @Test
>>>> @OperateOnDeployment("dep2")
>>>> public void queryOnB() throws Exception {
>>>> waitForSync();
>>>>
>>>> int count = getService().prepare(new
>>>> Query("KIND")).countEntities(Builder.withDefaults());
>>>> Assert.assertTrue("Number of entities: " + count, count == 2);
>>>> }
>>>>
>>
>> _______________________________________________
>> infinispan-dev mailing list
>> infinispan-dev at lists.jboss.org
>> https://lists.jboss.org/mailman/listinfo/infinispan-dev
>
> --
> Mircea Markus
> Infinispan lead (www.infinispan.org)
>
>
>
> _______________________________________________
> infinispan-dev mailing list
> infinispan-dev at lists.jboss.org
> https://lists.jboss.org/mailman/listinfo/infinispan-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://lists.jboss.org/pipermail/infinispan-dev/attachments/20120904/4b9eb0be/attachment-0001.html
More information about the infinispan-dev
mailing list