Hello,
We sometimes encounter issues with missing indexes and silos in a table.
The only solution we have is to restart the CrateDB pod.
We use the version 4.8.1
Isn’t there something better we can do?
Here’s an example log:
[2023-08-21T16:35:51,865][WARN ][o.e.i.c.IndicesClusterStateService] [app-cratedb-crate-0] [mtn.triphaso][1] marking and sending shard failed due to [failed recovery]
org.elasticsearch.indices.recovery.RecoveryFailedException: [mtn.triphaso][1]: Recovery failed on {app-cratedb-crate-0}{JNe3regLSQWIUagkV-tZmA}{c0iK4VC2QQWolruyENyDAQ}{10.6.0.99}{10.6.0.99:4300}{http_address=10.6.0.99:4200}
at org.elasticsearch.index.shard.IndexShard.lambda$executeRecovery$20(IndexShard.java:2563) ~[crate-server.jar:?]
at org.elasticsearch.action.ActionListener$1.onFailure(ActionListener.java:74) ~[crate-server.jar:?]
at org.elasticsearch.index.shard.StoreRecovery.lambda$recoveryListener$6(StoreRecovery.java:361) ~[crate-server.jar:?]
at org.elasticsearch.action.ActionListener$1.onFailure(ActionListener.java:74) ~[crate-server.jar:?]
at org.elasticsearch.action.ActionListener.completeWith(ActionListener.java:237) ~[crate-server.jar:?]
at org.elasticsearch.index.shard.StoreRecovery.recoverFromStore(StoreRecovery.java:94) ~[crate-server.jar:?]
at org.elasticsearch.index.shard.IndexShard.recoverFromStore(IndexShard.java:1756) ~[crate-server.jar:?]
at org.elasticsearch.action.ActionRunnable$2.doRun(ActionRunnable.java:73) [crate-server.jar:?]
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) [crate-server.jar:?]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) [?:?]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) [?:?]
at java.lang.Thread.run(Thread.java:833) [?:?]
Caused by: org.elasticsearch.index.shard.IndexShardRecoveryException: failed to recover from gateway
at org.elasticsearch.index.shard.StoreRecovery.internalRecoverFromStore(StoreRecovery.java:448) ~[crate-server.jar:?]
at org.elasticsearch.index.shard.StoreRecovery.lambda$recoverFromStore$0(StoreRecovery.java:96) ~[crate-server.jar:?]
at org.elasticsearch.action.ActionListener.completeWith(ActionListener.java:235) ~[crate-server.jar:?]
... 7 more
Caused by: org.elasticsearch.index.engine.EngineCreationFailureException: failed to create engine
at org.elasticsearch.index.engine.InternalEngine.<init>(InternalEngine.java:239) ~[crate-server.jar:?]
at org.elasticsearch.index.engine.InternalEngine.<init>(InternalEngine.java:191) ~[crate-server.jar:?]
at org.elasticsearch.index.engine.InternalEngineFactory.newReadWriteEngine(InternalEngineFactory.java:25) ~[crate-server.jar:?]
at org.elasticsearch.index.shard.IndexShard.innerOpenEngineAndTranslog(IndexShard.java:1518) ~[crate-server.jar:?]
at org.elasticsearch.index.shard.IndexShard.openEngineAndRecoverFromTranslog(IndexShard.java:1483) ~[crate-server.jar:?]
at org.elasticsearch.index.shard.StoreRecovery.internalRecoverFromStore(StoreRecovery.java:443) ~[crate-server.jar:?]
at org.elasticsearch.index.shard.StoreRecovery.lambda$recoverFromStore$0(StoreRecovery.java:96) ~[crate-server.jar:?]
at org.elasticsearch.action.ActionListener.completeWith(ActionListener.java:235) ~[crate-server.jar:?]
... 7 more
Caused by: java.nio.file.NoSuchFileException: /data/data/nodes/0/indices/K2iC3C7xQcG30M2Tmk8LvA/1/index/pending_segments_133
at sun.nio.fs.UnixException.translateToIOException(UnixException.java:92) ~[?:?]
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:106) ~[?:?]
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:111) ~[?:?]
at sun.nio.fs.UnixFileSystemProvider.implDelete(UnixFileSystemProvider.java:248) ~[?:?]
at sun.nio.fs.AbstractFileSystemProvider.delete(AbstractFileSystemProvider.java:105) ~[?:?]
at java.nio.file.Files.delete(Files.java:1152) ~[?:?]
at org.apache.lucene.store.FSDirectory.privateDeleteFile(FSDirectory.java:370) ~[lucene-core-8.11.0.jar:8.11.0 e912fdd5b632267a9088507a2a6bcbc75108f381 - jpountz - 2021-11-09 14:03:35]
at org.apache.lucene.store.FSDirectory.deleteFile(FSDirectory.java:339) ~[lucene-core-8.11.0.jar:8.11.0 e912fdd5b632267a9088507a2a6bcbc75108f381 - jpountz - 2021-11-09 14:03:35]
at org.apache.lucene.store.FilterDirectory.deleteFile(FilterDirectory.java:63) ~[lucene-core-8.11.0.jar:8.11.0 e912fdd5b632267a9088507a2a6bcbc75108f381 - jpountz - 2021-11-09 14:03:35]
at org.elasticsearch.index.store.ByteSizeCachingDirectory.deleteFile(ByteSizeCachingDirectory.java:175) ~[crate-server.jar:?]
at org.apache.lucene.store.FilterDirectory.deleteFile(FilterDirectory.java:63) ~[lucene-core-8.11.0.jar:8.11.0 e912fdd5b632267a9088507a2a6bcbc75108f381 - jpountz - 2021-11-09 14:03:35]
at org.elasticsearch.index.store.Store$StoreDirectory.deleteFile(Store.java:712) ~[crate-server.jar:?]
at org.elasticsearch.index.store.Store$StoreDirectory.deleteFile(Store.java:717) ~[crate-server.jar:?]
at org.apache.lucene.store.LockValidatingDirectoryWrapper.deleteFile(LockValidatingDirectoryWrapper.java:38) ~[lucene-core-8.11.0.jar:8.11.0 e912fdd5b632267a9088507a2a6bcbc75108f381 - jpountz - 2021-11-09 14:03:35]
at org.apache.lucene.index.IndexFileDeleter.deleteFile(IndexFileDeleter.java:705) ~[lucene-core-8.11.0.jar:8.11.0 e912fdd5b632267a9088507a2a6bcbc75108f381 - jpountz - 2021-11-09 14:03:35]
at org.apache.lucene.index.IndexFileDeleter.deleteFiles(IndexFileDeleter.java:699) ~[lucene-core-8.11.0.jar:8.11.0 e912fdd5b632267a9088507a2a6bcbc75108f381 - jpountz - 2021-11-09 14:03:35]
at org.apache.lucene.index.IndexFileDeleter.<init>(IndexFileDeleter.java:238) ~[lucene-core-8.11.0.jar:8.11.0 e912fdd5b632267a9088507a2a6bcbc75108f381 - jpountz - 2021-11-09 14:03:35]
at org.apache.lucene.index.IndexWriter.<init>(IndexWriter.java:1089) ~[lucene-core-8.11.0.jar:8.11.0 e912fdd5b632267a9088507a2a6bcbc75108f381 - jpountz - 2021-11-09 14:03:35]
at org.elasticsearch.index.engine.InternalEngine.createWriter(InternalEngine.java:2227) ~[crate-server.jar:?]
at org.elasticsearch.index.engine.InternalEngine.createWriter(InternalEngine.java:2215) ~[crate-server.jar:?]
at org.elasticsearch.index.engine.InternalEngine.<init>(InternalEngine.java:232) ~[crate-server.jar:?]
at org.elasticsearch.index.engine.InternalEngine.<init>(InternalEngine.java:191) ~[crate-server.jar:?]
at org.elasticsearch.index.engine.InternalEngineFactory.newReadWriteEngine(InternalEngineFactory.java:25) ~[crate-server.jar:?]
at org.elasticsearch.index.shard.IndexShard.innerOpenEngineAndTranslog(IndexShard.java:1518) ~[crate-server.jar:?]
at org.elasticsearch.index.shard.IndexShard.openEngineAndRecoverFromTranslog(IndexShard.java:1483) ~[crate-server.jar:?]
at org.elasticsearch.index.shard.StoreRecovery.internalRecoverFromStore(StoreRecovery.java:443) ~[crate-server.jar:?]
at org.elasticsearch.index.shard.StoreRecovery.lambda$recoverFromStore$0(StoreRecovery.java:96) ~[crate-server.jar:?]
at org.elasticsearch.action.ActionListener.completeWith(ActionListener.java:235) ~[crate-server.jar:?]
... 7 more
[2023-08-21T16:35:51,866][WARN ][o.e.c.r.a.AllocationService] [app-cratedb-crate-0] failing shard [failed shard, shard [mtn.triphaso][1], node[JNe3regLSQWIUagkV-tZmA], [P], recovery_source[existing store recovery; bootstrap_history_uuid=false], s[INITIALIZING], a[id=yxBxEjRfTs62W2cJ-UeQhw], unassigned_info[[reason=ALLOCATION_FAILED], at[2023-08-21T16:35:46.932Z], failed_attempts[4], failed_nodes[[JNe3regLSQWIUagkV-tZmA]], delayed=false, details[failed shard on node [JNe3regLSQWIUagkV-tZmA]: failed recovery, failure org.elasticsearch.indices.recovery.RecoveryFailedException: [mtn.triphaso][1]: Recovery failed on {app-cratedb-crate-0}{JNe3regLSQWIUagkV-tZmA}{c0iK4VC2QQWolruyENyDAQ}{10.6.0.99}{10.6.0.99:4300}{http_address=10.6.0.99:4200}
at org.elasticsearch.index.shard.IndexShard.lambda$executeRecovery$20(IndexShard.java:2563)
at org.elasticsearch.action.ActionListener$1.onFailure(ActionListener.java:74)
at org.elasticsearch.index.shard.StoreRecovery.lambda$recoveryListener$6(StoreRecovery.java:361)
at org.elasticsearch.action.ActionListener$1.onFailure(ActionListener.java:74)
at org.elasticsearch.action.ActionListener.completeWith(ActionListener.java:237)
at org.elasticsearch.index.shard.StoreRecovery.recoverFromStore(StoreRecovery.java:94)
at org.elasticsearch.index.shard.IndexShard.recoverFromStore(IndexShard.java:1756)
at org.elasticsearch.action.ActionRunnable$2.doRun(ActionRunnable.java:73)
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
at java.base/java.lang.Thread.run(Thread.java:833)
Caused by: [mtn.triphaso/K2iC3C7xQcG30M2Tmk8LvA][[mtn.triphaso][1]] org.elasticsearch.index.shard.IndexShardRecoveryException: failed to recover from gateway
at org.elasticsearch.index.shard.StoreRecovery.internalRecoverFromStore(StoreRecovery.java:448)
at org.elasticsearch.index.shard.StoreRecovery.lambda$recoverFromStore$0(StoreRecovery.java:96)
at org.elasticsearch.action.ActionListener.completeWith(ActionListener.java:235)
... 7 more
Caused by: [mtn.triphaso/K2iC3C7xQcG30M2Tmk8LvA][[mtn.triphaso][1]] org.elasticsearch.index.engine.EngineCreationFailureException: failed to create engine
at org.elasticsearch.index.engine.InternalEngine.<init>(InternalEngine.java:239)
at org.elasticsearch.index.engine.InternalEngine.<init>(InternalEngine.java:191)
at org.elasticsearch.index.engine.InternalEngineFactory.newReadWriteEngine(InternalEngineFactory.java:25)
at org.elasticsearch.index.shard.IndexShard.innerOpenEngineAndTranslog(IndexShard.java:1518)
at org.elasticsearch.index.shard.IndexShard.openEngineAndRecoverFromTranslog(IndexShard.java:1483)
at org.elasticsearch.index.shard.StoreRecovery.internalRecoverFromStore(StoreRecovery.java:443)
... 9 more
Caused by: java.nio.file.NoSuchFileException: /data/data/nodes/0/indices/K2iC3C7xQcG30M2Tmk8LvA/1/index/pending_segments_133
at java.base/sun.nio.fs.UnixException.translateToIOException(UnixException.java:92)
at java.base/sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:106)
at java.base/sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:111)
at java.base/sun.nio.fs.UnixFileSystemProvider.implDelete(UnixFileSystemProvider.java:248)
at java.base/sun.nio.fs.AbstractFileSystemProvider.delete(AbstractFileSystemProvider.java:105)
at java.base/java.nio.file.Files.delete(Files.java:1152)
at org.apache.lucene.store.FSDirectory.privateDeleteFile(FSDirectory.java:370)
at org.apache.lucene.store.FSDirectory.deleteFile(FSDirectory.java:339)
at org.apache.lucene.store.FilterDirectory.deleteFile(FilterDirectory.java:63)
at org.elasticsearch.index.store.ByteSizeCachingDirectory.deleteFile(ByteSizeCachingDirectory.java:175)
at org.apache.lucene.store.FilterDirectory.deleteFile(FilterDirectory.java:63)
at org.elasticsearch.index.store.Store$StoreDirectory.deleteFile(Store.java:712)
at org.elasticsearch.index.store.Store$StoreDirectory.deleteFile(Store.java:717)
at org.apache.lucene.store.LockValidatingDirectoryWrapper.deleteFile(LockValidatingDirectoryWrapper.java:38)
at org.apache.lucene.index.IndexFileDeleter.deleteFile(IndexFileDeleter.java:705)
at org.apache.lucene.index.IndexFileDeleter.deleteFiles(IndexFileDeleter.java:699)
at org.apache.lucene.index.IndexFileDeleter.<init>(IndexFileDeleter.java:238)
at org.apache.lucene.index.IndexWriter.<init>(IndexWriter.java:1089)
at org.elasticsearch.index.engine.InternalEngine.createWriter(InternalEngine.java:2227)
at org.elasticsearch.index.engine.InternalEngine.createWriter(InternalEngine.java:2215)
at org.elasticsearch.index.engine.InternalEngine.<init>(InternalEngine.java:232)
... 14 more
], allocation_status[no_valid_shard_copy]], message [failed recovery], markAsStale [true], failure [org.elasticsearch.indices.recovery.RecoveryFailedException: [mtn.triphaso][1]: Recovery failed on {app-cratedb-crate-0}{JNe3regLSQWIUagkV-tZmA}{c0iK4VC2QQWolruyENyDAQ}{10.6.0.99}{10.6.0.99:4300}{http_address=10.6.0.99:4200}
at org.elasticsearch.index.shard.IndexShard.lambda$executeRecovery$20(IndexShard.java:2563)
at org.elasticsearch.action.ActionListener$1.onFailure(ActionListener.java:74)
at org.elasticsearch.index.shard.StoreRecovery.lambda$recoveryListener$6(StoreRecovery.java:361)
at org.elasticsearch.action.ActionListener$1.onFailure(ActionListener.java:74)
at org.elasticsearch.action.ActionListener.completeWith(ActionListener.java:237)
at org.elasticsearch.index.shard.StoreRecovery.recoverFromStore(StoreRecovery.java:94)
at org.elasticsearch.index.shard.IndexShard.recoverFromStore(IndexShard.java:1756)
at org.elasticsearch.action.ActionRunnable$2.doRun(ActionRunnable.java:73)
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
at java.base/java.lang.Thread.run(Thread.java:833)
Caused by: [mtn.triphaso/K2iC3C7xQcG30M2Tmk8LvA][[mtn.triphaso][1]] org.elasticsearch.index.shard.IndexShardRecoveryException: failed to recover from gateway
at org.elasticsearch.index.shard.StoreRecovery.internalRecoverFromStore(StoreRecovery.java:448)
at org.elasticsearch.index.shard.StoreRecovery.lambda$recoverFromStore$0(StoreRecovery.java:96)
at org.elasticsearch.action.ActionListener.completeWith(ActionListener.java:235)
... 7 more
Caused by: [mtn.triphaso/K2iC3C7xQcG30M2Tmk8LvA][[mtn.triphaso][1]] org.elasticsearch.index.engine.EngineCreationFailureException: failed to create engine
at org.elasticsearch.index.engine.InternalEngine.<init>(InternalEngine.java:239)
at org.elasticsearch.index.engine.InternalEngine.<init>(InternalEngine.java:191)
at org.elasticsearch.index.engine.InternalEngineFactory.newReadWriteEngine(InternalEngineFactory.java:25)
at org.elasticsearch.index.shard.IndexShard.innerOpenEngineAndTranslog(IndexShard.java:1518)
at org.elasticsearch.index.shard.IndexShard.openEngineAndRecoverFromTranslog(IndexShard.java:1483)
at org.elasticsearch.index.shard.StoreRecovery.internalRecoverFromStore(StoreRecovery.java:443)
... 9 more
Caused by: java.nio.file.NoSuchFileException: /data/data/nodes/0/indices/K2iC3C7xQcG30M2Tmk8LvA/1/index/pending_segments_133
at java.base/sun.nio.fs.UnixException.translateToIOException(UnixException.java:92)
at java.base/sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:106)
at java.base/sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:111)
at java.base/sun.nio.fs.UnixFileSystemProvider.implDelete(UnixFileSystemProvider.java:248)
at java.base/sun.nio.fs.AbstractFileSystemProvider.delete(AbstractFileSystemProvider.java:105)
at java.base/java.nio.file.Files.delete(Files.java:1152)
at org.apache.lucene.store.FSDirectory.privateDeleteFile(FSDirectory.java:370)
at org.apache.lucene.store.FSDirectory.deleteFile(FSDirectory.java:339)
at org.apache.lucene.store.FilterDirectory.deleteFile(FilterDirectory.java:63)
at org.elasticsearch.index.store.ByteSizeCachingDirectory.deleteFile(ByteSizeCachingDirectory.java:175)
at org.apache.lucene.store.FilterDirectory.deleteFile(FilterDirectory.java:63)
at org.elasticsearch.index.store.Store$StoreDirectory.deleteFile(Store.java:712)
at org.elasticsearch.index.store.Store$StoreDirectory.deleteFile(Store.java:717)
at org.apache.lucene.store.LockValidatingDirectoryWrapper.deleteFile(LockValidatingDirectoryWrapper.java:38)
at org.apache.lucene.index.IndexFileDeleter.deleteFile(IndexFileDeleter.java:705)
at org.apache.lucene.index.IndexFileDeleter.deleteFiles(IndexFileDeleter.java:699)
at org.apache.lucene.index.IndexFileDeleter.<init>(IndexFileDeleter.java:238)
at org.apache.lucene.index.IndexWriter.<init>(IndexWriter.java:1089)
at org.elasticsearch.index.engine.InternalEngine.createWriter(InternalEngine.java:2227)
at org.elasticsearch.index.engine.InternalEngine.createWriter(InternalEngine.java:2215)
at org.elasticsearch.index.engine.InternalEngine.<init>(InternalEngine.java:232)
... 14 more
]]
Thank you