<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
    <head> 
        <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> 
        <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0"> 
        <base href="https://hibernate.atlassian.net"> 
        <title>Message Title</title> 
    </head> 
    <body class="jira" style="color: #333333; font-family: Arial, sans-serif; font-size: 14px; line-height: 1.429"> 
        <table id="background-table" cellpadding="0" cellspacing="0" width="100%" style="border-collapse: collapse; mso-table-lspace: 0; mso-table-rspace: 0; background-color: #f5f5f5; border-collapse: collapse; mso-table-lspace: 0; mso-table-rspace: 0" bgcolor="#f5f5f5"> 
            <!-- header here --> 
            <tbody>
                <tr> 
                    <td id="header-pattern-container" style="padding: 0; border-collapse: collapse; padding: 10px 20px"> 
                        <table id="header-pattern" cellspacing="0" cellpadding="0" border="0" style="border-collapse: collapse; mso-table-lspace: 0; mso-table-rspace: 0"> 
                            <tbody>
                                <tr> 
                                    <td id="header-avatar-image-container" valign="top" style="padding: 0; border-collapse: collapse; vertical-align: top; width: 32px; padding-right: 8px" width="32"> <img id="header-avatar-image" class="image_fix" src="https://secure.gravatar.com/avatar/3673815784047b7e0673677a0bc7dde0?d=mm&amp;s=48" height="32" width="32" border="0" style="border-radius: 3px; vertical-align: top"> </td> 
                                    <td id="header-text-container" valign="middle" style="padding: 0; border-collapse: collapse; vertical-align: middle; font-family: Arial, sans-serif; font-size: 14px; line-height: 20px; mso-line-height-rule: exactly; mso-text-raise: 1px"> <a class="user-hover" rel="yrodiere" id="email_yrodiere" href="https://hibernate.atlassian.net/secure/ViewProfile.jspa?name=yrodiere" style="color:#6c797f;; color: #3b73af; text-decoration: none">Yoann Rodière</a> <strong>commented</strong> on <a href="https://hibernate.atlassian.net/browse/HSEARCH-2616" style="color: #3b73af; text-decoration: none"><img src="cid:jira-generated-image-avatar-dfb11971-cc9a-4cc2-89d1-f15b96f08bce" height="16" width="16" border="0" align="absmiddle" alt="Sub-task"> HSEARCH-2616</a> </td> 
                                </tr> 
                            </tbody>
                        </table> </td> 
                </tr> 
                <tr> 
                    <td id="email-content-container" style="padding: 0; border-collapse: collapse; padding: 0 20px"> 
                        <table id="email-content-table" cellspacing="0" cellpadding="0" border="0" width="100%" style="border-collapse: collapse; mso-table-lspace: 0; mso-table-rspace: 0; border-spacing: 0; border-collapse: separate"> 
                            <tbody>
                                <tr> 
                                    <!-- there needs to be content in the cell for it to render in some clients --> 
                                    <td class="email-content-rounded-top mobile-expand" style="padding: 0; border-collapse: collapse; color: #ffffff; padding: 0 15px 0 16px; height: 15px; background-color: #ffffff; border-left: 1px solid #cccccc; border-top: 1px solid #cccccc; border-right: 1px solid #cccccc; border-bottom: 0; border-top-right-radius: 5px; border-top-left-radius: 5px; height: 10px; line-height: 10px; padding: 0 15px 0 16px; mso-line-height-rule: exactly" height="10" bgcolor="#ffffff">&nbsp;</td> 
                                </tr> 
                                <tr> 
                                    <td class="email-content-main mobile-expand " style="padding: 0; border-collapse: collapse; border-left: 1px solid #cccccc; border-right: 1px solid #cccccc; border-top: 0; border-bottom: 0; padding: 0 15px 0 16px; background-color: #ffffff" bgcolor="#ffffff"> 
                                        <table class="page-title-pattern" cellspacing="0" cellpadding="0" border="0" width="100%" style="border-collapse: collapse; mso-table-lspace: 0; mso-table-rspace: 0"> 
                                            <tbody>
                                                <tr> 
                                                    <td style="vertical-align: top;; padding: 0; border-collapse: collapse; padding-right: 5px; font-size: 20px; line-height: 30px; mso-line-height-rule: exactly" class="page-title-pattern-header-container"> <span class="page-title-pattern-header" style="font-family: Arial, sans-serif; padding: 0; font-size: 20px; line-height: 30px; mso-text-raise: 2px; mso-line-height-rule: exactly; vertical-align: middle"> <a href="https://hibernate.atlassian.net/browse/HSEARCH-2616" style="color: #3b73af; text-decoration: none">Re: JSR-352: Implement checkpoints so as to recover correctly from failures</a> </span> </td> 
                                                </tr> 
                                            </tbody>
                                        </table> </td> 
                                </tr> 
                                <tr> 
                                    <td id="text-paragraph-pattern-top" class="email-content-main mobile-expand  comment-top-pattern" style="padding: 0; border-collapse: collapse; border-left: 1px solid #cccccc; border-right: 1px solid #cccccc; border-top: 0; border-bottom: 0; padding: 0 15px 0 16px; background-color: #ffffff; border-bottom: none; padding-bottom: 0" bgcolor="#ffffff"> 
                                        <table class="text-paragraph-pattern" cellspacing="0" cellpadding="0" border="0" width="100%" style="border-collapse: collapse; mso-table-lspace: 0; mso-table-rspace: 0; font-family: Arial, sans-serif; font-size: 14px; line-height: 20px; mso-line-height-rule: exactly; mso-text-raise: 2px"> 
                                            <tbody>
                                                <tr> 
                                                    <td class="text-paragraph-pattern-container mobile-resize-text " style="padding: 0; border-collapse: collapse; padding: 0 0 10px"> <p style="margin: 10px 0 0; margin-top: 0">After a bit of discussion, it's clear that the solution I proposed won't always work. We cannot rely on the persisted batch state to determine what has been written into the index, simply because we can have a failure between the moment we write into the index and the moment we persist the batch state.</p> <p style="margin: 10px 0 0">So, <a href="https://hibernate.atlassian.net/secure/ViewProfile.jspa?name=sanne" class="user-hover" rel="sanne" style="color: #3b73af; text-decoration: none">Sanne Grinovero</a>'s solution of having a hidden field identifying the "work unit" that added a given document to the index seems to be the only workable one. Here is the todo list:</p> 
                                                        <ol> 
                                                            <li>We have to decide how we build the "work unit identifier".</li> 
                                                        </ol> 
                                                        <ul> 
                                                            <li>We have multiple checkpoints within a partition, so using the partition ID as discriminator is wrong, it should be something like a "checkpoint ID". I don't know if such thing exist, but worst case I guess we could combine the partition id with some checkpoint ID (by storing the number of successful checkpoints in the partition's persistent data).</li> 
                                                            <li>We must avoid at all cost our delete query to delete out-of-scope documents. This means the discriminator must also include something that will identify our job instance uniquely, while still being the same after restarting the job. Fortunately, such an ID is available through JobContext.getInstanceId(), which is guaranteed by the spec to be "a globally unique value within the job repository".</li> 
                                                        </ul> 
                                                        <ol> 
                                                            <li>We have to implement this hidden "work unit identifier" field: offer ways to define its value (when building an AddLuceneWork/UpdateLuceneWork? or when building a document?), and handle its addition to indexed documents (in Lucene <b>and</b> ES). <b>Note:</b> I'd rather do this in a separate ticket, and maybe even merge it to master instead of the jsr352 branch, because it's an important change and shouldn't be drowned in the many commits related to JSR-352 integration.</li> 
                                                            <li>We should implement these delete queries, which should occur before we write anything, upon each checkpoint.</li> 
                                                            <li>Maybe we should provide a way for users to not use this hidden field, in which case we'd either not support restarting a job (a dubious choice in my opinion, since recovery is the main strength of JSR-352) or always use UpdateLuceneWork (which would probably be very slow).</li> 
                                                            <li>I don't know anything about the performance of delete queries. I guess the number of items between two checkpoints must be large in order to make delete queries efficient; let's hope it doesn't need to so large that checkpoints won't make sense anymore (e.g. 10 000 is already a lot, IMHO). This obviously will require some testing.</li> 
                                                        </ol> </td> 
                                                </tr> 
                                            </tbody>
                                        </table> </td> 
                                </tr> 
                                <tr> 
                                    <td class="email-content-main mobile-expand " style="padding: 0; border-collapse: collapse; border-left: 1px solid #cccccc; border-right: 1px solid #cccccc; border-top: 0; border-bottom: 0; padding: 0 15px 0 16px; background-color: #ffffff" bgcolor="#ffffff"> <script type="application/ld+json">
{
  "@context": "http://schema.org",
  "@type": "EmailMessage",
  "description": "View Issue",
  "potentialAction": {
    "@type": "ViewAction",
        "target": "https://hibernate.atlassian.net/browse/HSEARCH-2616?inbox=true&focusedCommentId=93416&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-93416",
    "name": "View Comment"
      },
  "publisher": {
    "@type": "Organization",
    "name": "Atlassian",
    "url": "https://www.atlassian.com"
  }
}
</script> 
                                        <table id="actions-pattern" cellspacing="0" cellpadding="0" border="0" width="100%" style="border-collapse: collapse; mso-table-lspace: 0; mso-table-rspace: 0; font-family: Arial, sans-serif; font-size: 14px; line-height: 20px; mso-line-height-rule: exactly; mso-text-raise: 1px"> 
                                            <tbody>
                                                <tr> 
                                                    <td id="actions-pattern-container" valign="middle" style="padding: 0; border-collapse: collapse; padding: 10px 0 10px 24px; vertical-align: middle; padding-left: 0"> 
                                                        <table align="left" style="border-collapse: collapse; mso-table-lspace: 0; mso-table-rspace: 0"> 
                                                            <tbody>
                                                                <tr> 
                                                                    <td class="actions-pattern-action-icon-container" style="padding: 0; border-collapse: collapse; font-family: Arial, sans-serif; font-size: 14px; line-height: 20px; mso-line-height-rule: exactly; mso-text-raise: 0; vertical-align: middle"> <a href="https://hibernate.atlassian.net/browse/HSEARCH-2616#add-comment" target="_blank" title="Add Comment" style="color: #3b73af; text-decoration: none"> <img class="actions-pattern-action-icon-image" src="cid:jira-generated-image-static-comment-icon-5c89737f-e4ed-41a9-90b4-5dbff72d8704" alt="Add Comment" title="Add Comment" height="16" width="16" border="0" style="vertical-align: middle"> </a> </td> 
                                                                    <td class="actions-pattern-action-text-container" style="padding: 0; border-collapse: collapse; font-family: Arial, sans-serif; font-size: 14px; line-height: 20px; mso-line-height-rule: exactly; mso-text-raise: 4px; padding-left: 5px"> <a href="https://hibernate.atlassian.net/browse/HSEARCH-2616#add-comment" target="_blank" title="Add Comment" style="color: #3b73af; text-decoration: none">Add Comment</a> </td> 
                                                                </tr> 
                                                            </tbody>
                                                        </table> </td> 
                                                </tr> 
                                            </tbody>
                                        </table> </td> 
                                </tr> 
                                <!-- there needs to be content in the cell for it to render in some clients --> 
                                <tr> 
                                    <td class="email-content-rounded-bottom mobile-expand" style="padding: 0; border-collapse: collapse; color: #ffffff; padding: 0 15px 0 16px; height: 5px; line-height: 5px; background-color: #ffffff; border-top: 0; border-left: 1px solid #cccccc; border-bottom: 1px solid #cccccc; border-right: 1px solid #cccccc; border-bottom-right-radius: 5px; border-bottom-left-radius: 5px; mso-line-height-rule: exactly" height="5" bgcolor="#ffffff">&nbsp;</td> 
                                </tr> 
                            </tbody>
                        </table> </td> 
                </tr> 
                <tr> 
                    <td id="footer-pattern" style="padding: 0; border-collapse: collapse; padding: 12px 20px"> 
                        <table id="footer-pattern-container" cellspacing="0" cellpadding="0" border="0" style="border-collapse: collapse; mso-table-lspace: 0; mso-table-rspace: 0"> 
                            <tbody>
                                <tr> 
                                    <td id="footer-pattern-text" class="mobile-resize-text" width="100%" style="padding: 0; border-collapse: collapse; color: #999999; font-size: 12px; line-height: 18px; font-family: Arial, sans-serif; mso-line-height-rule: exactly; mso-text-raise: 2px"> This message was sent by Atlassian JIRA <span id="footer-build-information">(v1000.910.0#100040-<span title="0b083fcc3918e3e8109111c68d0bab4f2e5ea72a" data-commit-id="0b083fcc3918e3e8109111c68d0bab4f2e5ea72a}">sha1:0b083fc</span>)</span> </td> 
                                    <td id="footer-pattern-logo-desktop-container" valign="top" style="padding: 0; border-collapse: collapse; padding-left: 20px; vertical-align: top"> 
                                        <table style="border-collapse: collapse; mso-table-lspace: 0; mso-table-rspace: 0"> 
                                            <tbody>
                                                <tr> 
                                                    <td id="footer-pattern-logo-desktop-padding" style="padding: 0; border-collapse: collapse; padding-top: 3px"> <img id="footer-pattern-logo-desktop" src="cid:jira-generated-image-static-footer-desktop-logo-3618d952-cab9-468b-9ab7-0a5412ee3d29" alt="Atlassian logo" title="Atlassian logo" width="169" height="36" class="image_fix"> </td> 
                                                </tr> 
                                            </tbody>
                                        </table> </td> 
                                </tr> 
                            </tbody>
                        </table> </td> 
                </tr> 
            </tbody>
        </table>   
    </body>
</html>