Tuesday, June 28, 2011

Ensure valid Sitecore Internal Links in Page Editor

(Updated on Dec 9, 2011)
Refer to this post for updated solution.

(Updated on July 27, 2011)

A workaround developed for one of our customers incited me to blog about it as I can see how many other may get affected by this issue.

The issue was that whenever an editor opens RTE control in Page Editor, to get access to all provided functions, and saves the changes, all internal links (that start with “~/link.aspx”) get prefixed with the host name that is used in the browser to access the Page Editor. I could recreate this behavior only in IE browser and only when it runs in compatibility mode. Got same results in both IE8 and IE9 running in compatibility mode. Normal mode of IE8 does not cause the issue.

To address this issue I hooked into <saveUI> pipeline my processor that corrects internal links based on regex that is passed to match a part of the link. Here is the code I came up with:

Code Snippet
  1. using System;
  2. using System.Text.RegularExpressions;
  3. using Sitecore.Configuration;
  4. using Sitecore.Data.Fields;
  5. using Sitecore.Data.Items;
  6. using Sitecore.Pipelines.Save;
  7.  
  8. namespace Sitecore.Support.Pipelines.Save
  9. {
  10.    public class EnsureRichTextRelativeLinks
  11.    {
  12.       public void Process(SaveArgs args)
  13.       {
  14.          if (args.HasSheerUI)
  15.          {
  16.             if ((args.Result == "no") || (args.Result == "undefined"))
  17.             {
  18.                args.AbortPipeline();
  19.             }
  20.             else
  21.             {
  22.                for (int i = 0; i < args.Items.Length; i++)
  23.                {
  24.                   SaveArgs.SaveItem item = args.Items[i];
  25.                   Item contentItem = Context.ContentDatabase.Items[item.ID, item.Language, item.Version];
  26.                   if (contentItem != null)
  27.                   {
  28.                      foreach (SaveArgs.SaveField field in item.Fields)
  29.                      {
  30.                         Field fld = contentItem.Fields[field.ID];
  31.                         if (fld != null && fld.Type.Equals("rich text", StringComparison.InvariantCultureIgnoreCase))
  32.                         {
  33.                            if (!string.IsNullOrEmpty(field.Value))
  34.                            {
  35.                               field.Value = EnsureRelativeLinks(field.Value);
  36.                            }
  37.                         }
  38.                      }
  39.                   }
  40.                }
  41.             }
  42.          }
  43.       }
  44.  
  45.       protected virtual string EnsureRelativeLinks(string fieldValue)
  46.       {
  47.          string internalLinkPattern = Settings.GetSetting("PageEditor.InternalLinkReplacePattern",
  48.                                                           "((http)|(https)):((//)|(\\\\))({0}).*(~/link.aspx)");
  49.          string internalLinkReplacementValue = Settings.GetSetting("PageEditor.InternalLinkReplacementValue",
  50.                                                                    "~/link.aspx");
  51.          string mediaLinkPattern = Settings.GetSetting("PageEditor.MediaLinkReplacePattern",
  52.                                                        "((http)|(https)):((//)|(\\\\))({0}).*(~/media)");
  53.          string mediaLinkReplacementValue = Settings.GetSetting("PageEditor.MediaLinkReplacementValue", "~/media");
  54.          Regex linkPattern = new Regex(string.Format(internalLinkPattern, Sitecore.Web.WebUtil.GetHostName()), RegexOptions.IgnoreCase);
  55.          Regex mediaPattern = new Regex(string.Format(mediaLinkPattern, Sitecore.Web.WebUtil.GetHostName()), RegexOptions.IgnoreCase);
  56.          string value = linkPattern.Replace(fieldValue, internalLinkReplacementValue);
  57.          value = mediaPattern.Replace(value, mediaLinkReplacementValue);
  58.  
  59.          return value;
  60.       }
  61.    }
  62. }

I put regex pattern as well as replacement strings into include config file to make the adjustment easier if necessary. Here is how the config file looks like:

Code Snippet
  1. <configuration xmlns:patch="http://www.sitecore.net/xmlconfig/">
  2.   <sitecore>
  3.     <processors>
  4.       <saveUI>
  5.         <!--
  6.         Fix to address an issue of internal links being converted to absolute links
  7.         after saving content of RTE field in IE browser running in compatibility mode.
  8.         The fix is developed by Sitecore support and should be removed after the problem is fixed in the core product.
  9.         -->
  10.         <processor mode="on" type="Sitecore.Support.Pipelines.Save.EnsureRichTextRelativeLinks, Sitecore.Support.341310" patch:after="processor[@type='Sitecore.Pipelines.Save.ConvertLayoutField, Sitecore.Kernel']" />
  11.       </saveUI>
  12.     </processors>
  13.     <settings>
  14.       <!-- RegEx pattern to ensure valid internal links during the save event in Page Editor.
  15.            The issue occurs in IE browser running in compatibility mode.
  16.            The pattern will be replaced to a value defined at PageEditor.InternalLinkReplacementValue
  17.            The {0} parameter is used to insert a host name used in the browser to access Page Editor.
  18.       -->
  19.       <setting name="PageEditor.InternalLinkReplacePattern" value="((http)|(https)):((//)|(\\\\))({0}).*(~/link.aspx)" />
  20.       <!-- Replacement string for regex pattern defined in PageEditor.InternalLinkReplacePattern setting.
  21.       -->
  22.       <setting name="PageEditor.InternalLinkReplacementValue" value="~/link.aspx" />
  23.       <!-- RegEx pattern to ensure valid media links during the save event in Page Editor.
  24.            The issue occurs in IE browser running in compatibility mode.
  25.            The pattern will be replaced to a value defined at PageEditor.MediaLinkReplacementValue
  26.            The {0} parameter is used to insert a host name used in the browser to access Page Editor.
  27.       -->
  28.       <setting name="PageEditor.MediaLinkReplacePattern" value="((http)|(https)):((//)|(\\\\))({0}).*(~/media)" />
  29.       <!-- Replacement string for regex pattern defined in PageEditor.MediaLinkReplacePattern setting.
  30.       -->
  31.       <setting name="PageEditor.MediaLinkReplacementValue" value="~/media" />
  32.     </settings>
  33.   </sitecore>
  34. </configuration>

Oh yeah, I wasn’t sure if this could happen to media links (couldn’t reproduce it locally) but decided to add the same replacement functionality for “~/media” links as well. If you find it useless, feel free to remove that part :).

This code was developed and tested in Sitecore 6.4.1 rev.110324. It’s expected to work in any 6.4 version. Can’t see any problems with 6.5 but I haven’t tested it there.

As all the code fit into the snippet boxes above, I don’t provide links to sources for download. Though here is the link to Sitecore package that installs the fix.

Hope it saves you some time!

Friday, January 21, 2011

Upcoming Sitecore event: Dreamcore 2011

The spring is coming and along with it our second Dreamcore event is coming too. If you want to know more about Sitecore’s functionality and the future of our product, book your seat in the Dreamcore 2011 bullet train.

Tuesday, October 26, 2010

All-in-1 Workflow

 

In this article I’m going to describe an approach that we’ve taken creating a solution for a universal workflow requirement. The idea was to create one workflow for all content items as they all would follow the same workflow process.

Here is a use case that came from one of our wonderful customers:

  • All content items should have the same workflow process.
  • Different security roles may have or not have access to a workflow state at different parts of a content tree.
  • Workbox should respect aforementioned security configuration.

To understand what access rights provide editing permission to the user let’s take a look at the way Sitecore resolves access level to a content item.

  • Check if a user has Read (item:read) access to an item. If so, the user will be able to see the item in a Content Editor.image
  • Check if the user has Write (item:write) access to the item. If the item is in workflow, check whether the user has Write access to a workflow state the item is in (workflowState:write). If either of those access rights is not granted, reclaim modification access.
    image

As you can see both Write and Workflow State Write access rights are required for a user to edit the item.

This is the approach we came up with to address all the requirements. We can meet all requirements by extending standard Workflow class and using it to run workflow process.

First. Define additional access right that along with Workflow state Write one would determine access to items in workflow at different parts of the content tree. In this approach we decided to use “workflowState:write” access right which is available only to workflow related items by default. In this case the only thing we need to do is to make it available for all of the items. And it could be easily configured in web.config file:

Code Snippet
  1. <rules>
  2.   <add prefix="workflowState:write" typeName="Sitecore.Data.Items.Item"/>
  3. </rules>

Now you can set this right for any item in Security Editor. Just don’t forget to add an appropriate column to see it there.

image

Second. Extend default Sitecore.Workflows.Simple.Workflow class to take into account new security configuration.

Code Snippet
  1. namespace TwinPeaks.Workflows
  2. {
  3.     public class Workflow : Sitecore.Workflows.Simple.Workflow, IWorkflow
  4.     {
  5.         private const string CheckRequiredFieldName = "Check required";
  6.  
  7.         public Workflow(string workflowId, WorkflowProvider owner)
  8.             : base(workflowId, owner)
  9.         {
  10.             Owner = owner;
  11.         }
  12.  
  13.         /// <summary>
  14.         /// Returns workflow state commands.
  15.         /// </summary>
  16.         /// <param name="item">Content item.</param>
  17.         /// <returns></returns>
  18.         public override WorkflowCommand[] GetCommands(Item item)
  19.         {
  20.             Assert.ArgumentNotNull(item, "item");
  21.             string stateID = this.GetStateID(item);
  22.             if (stateID.Length > 0)
  23.             {
  24.                 return GetCommands(stateID, item);
  25.             }
  26.             return new WorkflowCommand[0];
  27.         }
  28.  
  29.         /// <summary>
  30.         /// Returns workflow state commands.
  31.         /// </summary>
  32.         /// <param name="stateId">Workflow state ID</param>
  33.         /// <param name="item">Content item</param>
  34.         /// <returns></returns>
  35.         public WorkflowCommand[] GetCommands(string stateId, Item item)
  36.         {
  37.             Assert.ArgumentNotNullOrEmpty(stateId, "stateID");
  38.             Item stateItem = GetStateItem(stateId);
  39.             WorkflowState workflowState = GetState(stateId);
  40.             if (stateItem == null || workflowState == null)
  41.             {
  42.                 return new WorkflowCommand[0];
  43.             }
  44.             Item[] itemArray = stateItem.Children.ToArray();
  45.             ArrayList list = new ArrayList();
  46.             foreach (Item entity in itemArray)
  47.             {
  48.                 if (entity != null)
  49.                 {
  50.                     Template template = entity.Database.Engines.TemplateEngine.GetTemplate(entity.TemplateID);
  51.                     if (workflowState.CheckRequired && !string.IsNullOrEmpty(AccessRight.WorkflowStateWrite.Name))
  52.                     {
  53.                         if (((template != null) && template.DescendsFromOrEquals(TemplateIDs.WorkflowCommand)) &&
  54.                         AuthorizationManager.IsAllowed(entity, AccessRight.WorkflowCommandExecute, Context.User) &&
  55.                         AuthorizationManager.IsAllowed(item, AccessRight.FromName(AccessRight.WorkflowStateWrite.Name), Context.User))
  56.                         {
  57.                             list.Add(new WorkflowCommand(entity.ID.ToString(), entity.DisplayName,
  58.                                                          entity.Appearance.Icon, false,
  59.                                                          entity["suppress comment"] == "1"));
  60.                         }
  61.                     }
  62.                     else if (((template != null) && template.DescendsFromOrEquals(TemplateIDs.WorkflowCommand)) &&
  63.                         AuthorizationManager.IsAllowed(entity, AccessRight.WorkflowCommandExecute, Context.User))
  64.                     {
  65.                         list.Add(new WorkflowCommand(entity.ID.ToString(), entity.DisplayName, entity.Appearance.Icon, false, entity["suppress comment"] == "1"));
  66.                     }
  67.                 }
  68.             }
  69.             return (WorkflowCommand[])list.ToArray(typeof(WorkflowCommand));
  70.         }
  71.  
  72.         /// <summary>
  73.         /// Returns workflow state item
  74.         /// </summary>
  75.         /// <param name="stateId">Workflow state ID</param>
  76.         /// <returns></returns>
  77.         protected Item GetStateItem(string stateId)
  78.         {
  79.             ID iD = MainUtil.GetID(stateId, null);
  80.             if (iD == (ID)null)
  81.             {
  82.                 return null;
  83.             }
  84.             return ItemManager.GetItem(stateId, Language.Current, Version.Latest, Owner.Database, SecurityCheck.Disable);
  85.         }
  86.  
  87.         /// <summary>
  88.         /// Returns workflow state ID
  89.         /// </summary>
  90.         /// <param name="item">Content item</param>
  91.         /// <returns></returns>
  92.         protected string GetStateID(Item item)
  93.         {
  94.             Assert.ArgumentNotNull(item, "item");
  95.             WorkflowInfo workflowInfo = item.Database.DataManager.GetWorkflowInfo(item);
  96.             if (workflowInfo != null)
  97.             {
  98.                 return workflowInfo.StateID;
  99.             }
  100.             return string.Empty;
  101.         }
  102.  
  103.         /// <summary>
  104.         /// Need to override to respect new right in Workbox application
  105.         /// </summary>
  106.         public override DataUri[] GetItems(string stateId)
  107.         {
  108.             if (CheckStateAdvancedSecurity(stateId))
  109.             {
  110.                 Assert.ArgumentNotNullOrEmpty(stateId, "stateID");
  111.                 Assert.IsTrue(ID.IsID(stateId), "Invalid state ID: " + stateId);
  112.                 DataUri[] itemsInWorkflowState =
  113.                     Owner.Database.DataManager.GetItemsInWorkflowState(new WorkflowInfo(WorkflowID, stateId));
  114.                 DataUri[] filteredItems = ApplyAdvancedSecurity(itemsInWorkflowState, stateId);
  115.                 if (filteredItems != null)
  116.                 {
  117.                     return filteredItems;
  118.                 }
  119.                 return new DataUri[0];
  120.             }
  121.             return base.GetItems(stateId);
  122.         }
  123.  
  124.         /// <summary>
  125.         /// Indicates if advanced security should be checked for a workflow state.
  126.         /// </summary>
  127.         /// <param name="stateId">Workflow satate ID</param>
  128.         /// <returns></returns>
  129.         protected bool CheckStateAdvancedSecurity(string stateId)
  130.         {
  131.             WorkflowState workflowState = GetState(stateId);
  132.             if (workflowState != null && workflowState.CheckRequired && !string.IsNullOrEmpty(AccessRight.WorkflowStateWrite.Name))
  133.             {
  134.                 return true;
  135.             }
  136.  
  137.             return false;
  138.         }
  139.  
  140.         /// <summary>
  141.         /// Filters out items that a user should not have access to.
  142.         /// </summary>
  143.         /// <param name="items">DataUri array of content items.</param>
  144.         /// <param name="stateId">Workflow state ID.</param>
  145.         /// <returns></returns>
  146.         protected DataUri[] ApplyAdvancedSecurity(DataUri[] items, string stateId)
  147.         {
  148.             if (items == null || items.Length == 0)
  149.             {
  150.                 return new DataUri[0];
  151.             }
  152.             WorkflowState workflowState = GetState(stateId);
  153.             if (workflowState == null)
  154.             {
  155.                 return new DataUri[0];
  156.             }
  157.             var filteredItems =
  158.                 items.Where(
  159.                     item => Owner.Database.GetItem(item) != null &&
  160.                             AuthorizationManager.IsAllowed(Owner.Database.GetItem(item),
  161.                                                            AccessRight.FromName(AccessRight.WorkflowStateWrite.Name),
  162.                                                            Context.User));
  163.             if (!filteredItems.GetEnumerator().MoveNext())
  164.             {
  165.                 return new DataUri[0];
  166.             }
  167.             return filteredItems.ToArray();
  168.         }
  169.  
  170.         /// <summary>
  171.         /// Returns an extended WorkflowState object.
  172.         /// </summary>
  173.         /// <param name="stateId">Workflow state ID.</param>
  174.         /// <returns></returns>
  175.         new protected WorkflowState GetState(string stateId)
  176.         {
  177.             Assert.ArgumentNotNullOrEmpty(stateId, "stateId");
  178.             Item stateItem = GetStateItem(stateId);
  179.             if (stateItem != null)
  180.             {
  181.                 return new WorkflowState(stateId, stateItem.DisplayName, stateItem.Appearance.Icon, stateItem[WorkflowFieldIDs.FinalState] == "1", stateItem[CheckRequiredFieldName] == "1");
  182.             }
  183.             return null;
  184.         }
  185.  
  186.         /// <summary>
  187.         /// Returns access result of whether the user has write access to the item.
  188.         /// </summary>
  189.         /// <param name="item">Content item.</param>
  190.         /// <param name="account">User account</param>
  191.         /// <param name="accessRight">Access right</param>
  192.         /// <returns></returns>
  193.         new public AccessResult GetAccess(Item item, Account account, AccessRight accessRight)
  194.         {
  195.             Assert.ArgumentNotNull(item, "item");
  196.             Assert.ArgumentNotNull(account, "account");
  197.             Assert.ArgumentNotNull(accessRight, "operation");
  198.             Item stateItem = GetStateItem(item);
  199.             if (stateItem == null)
  200.             {
  201.                 return new AccessResult(AccessPermission.Allow, new AccessExplanation(item, account, AccessRight.ItemDelete, "The workflow state definition item not found.", new object[0]));
  202.             }
  203.             if (accessRight == AccessRight.ItemWrite)
  204.             {
  205.                 return GetWriteAccessInformation(item, account, stateItem);
  206.             }
  207.             return base.GetAccess(item, account, accessRight);
  208.         }
  209.  
  210.         /// <summary>
  211.         /// Resolves whether the user has write access to the item.
  212.         /// </summary>
  213.         /// <param name="item">Content item.</param>
  214.         /// <param name="account">User account.</param>
  215.         /// <param name="stateItem">Workflow state item.</param>
  216.         /// <returns></returns>
  217.         protected AccessResult GetWriteAccessInformation(Item item, Account account, Item stateItem)
  218.         {
  219.             WorkflowState workflowState = GetState(stateItem.ID.ToString());
  220.             if (workflowState != null && workflowState.CheckRequired)
  221.             {
  222.                 if (AuthorizationManager.IsAllowed(stateItem, AccessRight.WorkflowStateWrite, account) && AuthorizationManager.IsAllowed(item, AccessRight.WorkflowStateWrite, account))
  223.                 {
  224.                     return new AccessResult(AccessPermission.Allow, new AccessExplanation(item, account, AccessRight.ItemWrite, string.Format("The workflow state definition item allows writing (through the '{0}' access right).", AccessRight.WorkflowStateWrite.Name), new object[0]));
  225.                 }
  226.             }
  227.             else if (AuthorizationManager.IsAllowed(stateItem, AccessRight.WorkflowStateWrite, account))
  228.             {
  229.                 return new AccessResult(AccessPermission.Allow, new AccessExplanation(item, account, AccessRight.ItemWrite, string.Format("The workflow state definition item allows writing (through the '{0}' access right).", AccessRight.WorkflowStateWrite.Name), new object[0]));
  230.             }
  231.             return new AccessResult(AccessPermission.Deny, new AccessExplanation(item, account, AccessRight.ItemWrite, string.Format("The workflow state definition item does not allow writing. To allow writing, grant the '{0}' access right to the workflow state definition item.", AccessRight.WorkflowStateWrite.Name), new object[0]));
  232.         }
  233.  
  234.         /// <summary>
  235.         /// Returns workflow state item the content item is in.
  236.         /// </summary>
  237.         /// <param name="item">Content item.</param>
  238.         /// <returns></returns>
  239.         protected Item GetStateItem(Item item)
  240.         {
  241.             Assert.ArgumentNotNull(item, "item");
  242.             WorkflowInfo info = item.Database.DataManager.GetWorkflowInfo(item);
  243.             if (info != null)
  244.             {
  245.                 return item.Database.SelectSingleItem(info.StateID);
  246.             }
  247.             return null;
  248.         }
  249.  
  250.         #region Properties
  251.  
  252.         protected WorkflowProvider Owner { get; set; }
  253.  
  254.         #endregion Properties
  255.     }
  256. }

To provide an ability to choose whether access to a workflow state should be combined with access to a content item, I extended System/Workflow/State template with a checkbox field that indicates whether a custom logic should be triggered. Here how it looks now:

image

I extended WorkflowState class with an appropriate property for the new field.

Code Snippet
  1. namespace TwinPeaks.Workflows
  2. {
  3.     public class WorkflowState : Sitecore.Workflows.WorkflowState
  4.     {
  5.         public WorkflowState(string stateId, string displayName, string icon, bool finalState, bool checkRequired) : base(stateId, displayName, icon, finalState)
  6.         {
  7.             CheckRequired = checkRequired;
  8.         }
  9.  
  10.         /// <summary>
  11.         /// Indicates if workflowState:write access right should be considered while resolving access to the item.
  12.         /// </summary>
  13.         public bool CheckRequired { get; private set; }
  14.     }
  15. }

Now in order to make Sitecore use our new Workflow class we need to override WorkflowProvider to return our extended Workflow instance.

Code Snippet
  1. using Sitecore;
  2. using Sitecore.Data;
  3. using Sitecore.Data.Items;
  4. using Sitecore.Diagnostics;
  5. using Sitecore.Workflows;
  6.  
  7. namespace TwinPeaks.Workflows
  8. {
  9.     /// <summary>
  10.     /// This class overrides required methods to return an object of extended Workflow class.
  11.     /// </summary>
  12.     public class WorkflowProvider : Sitecore.Workflows.Simple.WorkflowProvider
  13.     {
  14.         public WorkflowProvider(string databaseName, HistoryStore historyStore) : base(databaseName, historyStore)
  15.         {
  16.         }
  17.  
  18.         public override IWorkflow GetWorkflow(Item item)
  19.         {
  20.             Assert.ArgumentNotNull(item, "item");
  21.             string workflowID = GetWorkflowID(item);
  22.             if (workflowID.Length > 0)
  23.             {
  24.                 return new Workflow(workflowID, this);
  25.             }
  26.             return null;
  27.         }
  28.  
  29.         public override IWorkflow GetWorkflow(string workflowID)
  30.         {
  31.             Assert.ArgumentNotNullOrEmpty(workflowID, "workflowID");
  32.             Error.Assert(ID.IsID(workflowID), "The parameter 'workflowID' must be parseable to an ID");
  33.             if (this.Database.Items[ID.Parse(workflowID)] != null)
  34.             {
  35.                 return new Workflow(workflowID, this);
  36.             }
  37.             return null;
  38.         }
  39.  
  40.         private static string GetWorkflowID(Item item)
  41.         {
  42.             Assert.ArgumentNotNull(item, "item");
  43.             WorkflowInfo workflowInfo = item.Database.DataManager.GetWorkflowInfo(item);
  44.             if (workflowInfo != null)
  45.             {
  46.                 return workflowInfo.WorkflowID;
  47.             }
  48.             return string.Empty;
  49.         }
  50.  
  51.         public override IWorkflow[] GetWorkflows()
  52.         {
  53.             Item item = this.Database.Items[ItemIDs.WorkflowRoot];
  54.             if (item == null)
  55.             {
  56.                 return new IWorkflow[0];
  57.             }
  58.             Item[] itemArray = item.Children.ToArray();
  59.             IWorkflow[] workflowArray = new IWorkflow[itemArray.Length];
  60.             for (int i = 0; i < itemArray.Length; i++)
  61.             {
  62.                 workflowArray[i] = new Workflow(itemArray[i].ID.ToString(), this);
  63.             }
  64.             return workflowArray;
  65.         }
  66.     }
  67. }

Third. Configure Sitecore solution to work with this customization. Below is a complete example of UniversalWorkflow.config file that could be placed into /App_Config/Include folder to enable this customization:

Code Snippet
  1. <configuration xmlns:patch="http://www.sitecore.net/xmlconfig/">
  2.   <sitecore>
  3.     <databases>
  4.       <database id="master">
  5.         <workflowProvider>
  6.           <patch:attribute name="type">TwinPeaks.Workflows.WorkflowProvider, TwinPeaks.Workflows</patch:attribute>
  7.         </workflowProvider>
  8.       </database>
  9.     </databases>
  10.     <accessRights defaultProvider="config">
  11.       <rules>
  12.         <add prefix="workflowState:write" typeName="Sitecore.Data.Items.Item"/>
  13.       </rules>
  14.     </accessRights>
  15.  
  16.   </sitecore>
  17. </configuration>

Why is this solution is worth to blog about? Because it allows us to address all the requirements by customizing only one thing – Workflow class. Both Workbox and Content Editor will respect security configuration if “check required” field is selected on a workflow state item.

Feel free to share your thoughts on this approach as well as suggest improvements or even better solution.
Hope you find it helpful.

Wednesday, October 20, 2010

Sitecore Lucene index does not remove old data

Looks like interest to Sitecore implementation of Lucene index has raised since Dream Core event and developers have run into an issue with old data being kept in the index repository. In this article I want to show you how to go around this issue.
First of all let’s see why it’s happening. I ran into this issue when I started playing with new implementation of Lucene index in Sitecore 6. When I created an output of the results I saw duplicates of my data in there. I stated debugging my code and found that Lucene somehow recognizes raw GUID’s which breaks search criteria that Sitecore uses to find items during update/delete procedure.
To solve this issue I had to create additional field for Lucene index (_shorttemplateid) and store there short GUID for an item (item.ID.ToShortID()). Then override AddMatchCriteria method and dependent properties to use short template GUID for matching criteria. Below is the code example.

Code Snippet
  1. namespace LuceneExamples
  2. {
  3.    public class DatabaseCrawler : Sitecore.Search.Crawlers.DatabaseCrawler
  4.    {
  5.       #region Fields
  6.  
  7.       private bool _hasIncludes;
  8.       private bool _hasExcludes;
  9.       private Dictionary<string, bool> _templateFilter;
  10.       private ArrayList _customFields;
  11.  
  12.       #endregion Fields
  13.  
  14.       #region ctor
  15.  
  16.       public DatabaseCrawler()
  17.       {
  18.          _templateFilter = new Dictionary<string, bool>();
  19.          _customFields = new ArrayList();
  20.       }
  21.  
  22.       #endregion ctor
  23.  
  24.       #region Base class methods
  25.  
  26.       // Should be overriden to add date fields in "yyyyMMddHHmmss" format. Otherwise it's not possible to create range queries for date values.
  27.       // Also adds _shorttemplateid field which has a template id in ShortID format.
  28.       protected override void AddAllFields(Document document, Item item, bool versionSpecific)
  29.       {
  30.          Assert.ArgumentNotNull(document, "document");
  31.          Assert.ArgumentNotNull(item, "item");
  32.          Sitecore.Collections.FieldCollection fields = item.Fields;
  33.          fields.ReadAll();
  34.          foreach (Sitecore.Data.Fields.Field field in fields)
  35.          {
  36.             if (!string.IsNullOrEmpty(field.Key) && (field.Shared != versionSpecific))
  37.             {
  38.                bool tokenize = base.IsTextField(field);
  39.                if (IndexAllFields)
  40.                {
  41.                   if (field.TypeKey == "date" || field.TypeKey == "datetime")
  42.                   {
  43.                      IndexDateFields(document, field.Key, field.Value);
  44.                   }
  45.                   else
  46.                   {
  47.                      document.Add(CreateField(field.Key, field.Value, tokenize, 1f));
  48.                   }
  49.                }
  50.                if (tokenize)
  51.                {
  52.                   document.Add(CreateField(BuiltinFields.Content, field.Value, true, 1f));
  53.                }
  54.             }
  55.          }
  56.          AddShortTemplateId(document, item);
  57.          AddCustomFields(document, item);
  58.       }
  59.  
  60.       /// <summary>
  61.       /// Loops through the collection of custom fields and adds them to fields collection of each indexed item.
  62.       /// </summary>
  63.       /// <param name="document">Lucene document</param>
  64.       /// <param name="item">Sitecore data item</param>
  65.       private void AddCustomFields(Document document, Item item)
  66.       {
  67.          foreach(CustomField field in _customFields)
  68.          {
  69.             document.Add(CreateField(field.LuceneFieldName, field.GetFieldValue(item), field.StorageType, field.IndexType, Boost));
  70.          }
  71.       }
  72.  
  73.       /// <summary>
  74.       /// Creates a Lucene field.
  75.       /// </summary>
  76.       /// <param name="fieldKey">Field name</param>
  77.       /// <param name="fieldValue">Field value</param>
  78.       /// <param name="storeType">Storage option</param>
  79.       /// <param name="indexType">Index type</param>
  80.       /// <param name="boost">Boosting parameter</param>
  81.       /// <returns></returns>
  82.       private Fieldable CreateField(string fieldKey, string fieldValue, Field.Store storeType, Field.Index indexType, float boost)
  83.       {
  84.          Field field = new Field(fieldKey, fieldValue, storeType, indexType);
  85.          field.SetBoost(boost);
  86.          return field;
  87.       }
  88.  
  89.       /// <summary>
  90.       /// Parses a configuration entry for a custom field and adds it to a collection of custom fields.
  91.       /// </summary>
  92.       /// <param name="node">Configuration entry</param>
  93.       public void AddCustomField(XmlNode node)
  94.       {
  95.          CustomField field = CustomField.ParseConfigNode(node);
  96.          if (field == null)
  97.          {
  98.             throw new InvalidOperationException("Could not parse custom field entry: " + node.OuterXml);
  99.          }
  100.          _customFields.Add(field);
  101.       }
  102.  
  103.       // Method should use _shorttemplateid to allow one create combined/boolean search queries with template id reference.
  104.       // Also used to create a matching criteria for update/delete actions.
  105.       protected override void AddMatchCriteria(BooleanQuery query)
  106.       {
  107.          query.Add(new TermQuery(new Term(BuiltinFields.Database, Database)), BooleanClause.Occur.MUST);
  108.          query.Add(new TermQuery(new Term(BuiltinFields.Path, Sitecore.Data.ShortID.Encode(Root).ToLowerInvariant())), BooleanClause.Occur.MUST);
  109.          if (HasIncludes || HasExcludes)
  110.          {
  111.             foreach (KeyValuePair<string, bool> pair in TemplateFilter)
  112.             {
  113.                query.Add(new TermQuery(new Term(Constants.ShortTemplate, Sitecore.Data.ShortID.Encode(pair.Key).ToLowerInvariant())), pair.Value ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST_NOT);
  114.             }
  115.          }
  116.       }
  117.  
  118.       // Method should be overriden because _hasIncludes and _hasExcludes variables were introduced.
  119.       protected override bool IsMatch(Item item)
  120.       {
  121.           bool flag;
  122.           Assert.ArgumentNotNull(item, "item");
  123.           if (!RootItem.Axes.IsAncestorOf(item))
  124.           {
  125.               return false;
  126.           }
  127.           if (!HasIncludes && !HasExcludes)
  128.           {
  129.               return true;
  130.           }
  131.           if (!TemplateFilter.TryGetValue(item.TemplateID.ToString(), out flag))
  132.           {
  133.               return !HasIncludes;
  134.           }
  135.           return flag;
  136.       }
  137.  
  138.       // Method required to override AddMatchCriteria one.
  139.       new public void IncludeTemplate(string templateId)
  140.       {
  141.          Assert.ArgumentNotNullOrEmpty(templateId, "templateId");
  142.          _hasIncludes = true;
  143.          _templateFilter[templateId] = true;
  144.       }
  145.  
  146.       // Method required to override AddMatchCriteria one.
  147.       new public void ExcludeTemplate(string templateId)
  148.       {
  149.          Assert.ArgumentNotNullOrEmpty(templateId, "templateId");
  150.          _hasExcludes = true;
  151.          _templateFilter[templateId] = false;
  152.       }
  153.  
  154.       #endregion Base class methods
  155.  
  156.       /// <summary>
  157.       /// Converts Sitecore date and datetime fields to the recognizable format for Lucene API.
  158.       /// </summary>
  159.       /// <param name="doc">Lucene document object</param>
  160.       /// <param name="fieldKey">Field name</param>
  161.       /// <param name="fieldValue">Field value</param>
  162.       private void IndexDateFields(Document doc, string fieldKey, string fieldValue)
  163.       {
  164.          DateTime dateTime = Sitecore.DateUtil.IsoDateToDateTime(fieldValue);
  165.          string luceneDate = "";
  166.          if (dateTime != DateTime.MinValue)
  167.          {
  168.             luceneDate = dateTime.ToString(Constants.DateTimeFormat);
  169.          }
  170.          doc.Add(CreateField(fieldKey, luceneDate, false, 1f));
  171.       }
  172.  
  173.       /// <summary>
  174.       /// Adds template id in ShortID format
  175.       /// </summary>
  176.       /// <param name="doc">Lucene document object</param>
  177.       /// <param name="item">Sitecore item</param>
  178.       private void AddShortTemplateId(Document doc, Item item)
  179.       {
  180.          doc.Add(CreateField(Constants.ShortTemplate, Sitecore.Data.ShortID.Encode(item.TemplateID).ToLowerInvariant(), false, 1f));
  181.       }
  182.  
  183.       #region Properties
  184.  
  185.       protected bool HasIncludes
  186.       {
  187.          get
  188.          {
  189.             return _hasIncludes;
  190.          }
  191.          set
  192.          {
  193.             _hasIncludes = value;
  194.          }
  195.       }
  196.  
  197.       protected bool HasExcludes
  198.       {
  199.          get
  200.          {
  201.             return _hasExcludes;
  202.          }
  203.          set
  204.          {
  205.             _hasExcludes = value;
  206.          }
  207.       }
  208.  
  209.       protected Dictionary<string, bool> TemplateFilter
  210.       {
  211.          get
  212.          {
  213.             return _templateFilter;
  214.          }
  215.       }
  216.  
  217.       protected Item RootItem
  218.       {
  219.          get
  220.          {
  221.             return Sitecore.Data.Managers.ItemManager.GetItem(Root, Sitecore.Globalization.Language.Invariant,
  222.                                                               Sitecore.Data.Version.Latest,
  223.                                                               Sitecore.Data.Database.GetDatabase(Database),
  224.                                                               Sitecore.SecurityModel.SecurityCheck.Disable);
  225.          }
  226.       }
  227.  
  228.       #endregion Properties
  229.  
  230.    }
  231. }

This should solve this issue as well as add Lucene recognizable format for Sitecore date and datetime field types. Also it will allow to build Combined and Boolean search queries.

Update. Code for the Constants class:

   1: namespace LuceneExamples
   2: {
   3:    public class Constants
   4:    {
   5:       // special field for template id in ShortID format
   6:       public const string ShortTemplate = "_shorttemplateid";
   7:  
   8:       // searchable date-time format. All datetime field
   9:       public const string DateTimeFormat = "yyyyMMddHHmmss";
  10:  
  11:       // Path to lucene setting items: /sitecore/system/Settings/Lucene
  12:       public const string LuceneSettingsPath = "{89783047-026C-45B5-AB5B-338E4A22446C}";
  13:    }
  14: }


Hope it saves someone a minute or two.