none
LINQ - Identify duplicate rows in DataTable - Values with Space

    Question

  • Hi,

    I using the below LINQ query to identify duplicate records in a DataTable.

    Call this method from a C# windows application.

    If I add value without space it is considered as duplicate.   - dt.Rows.Add(2, "John", "Sql");  

    If I add value with space it is not considered as duplicate  - dt.Rows.Add(2, "John", "Sql  ");  

    Actually I want to trim the space and check for duplicates in a Datatable.

    public void GroupByLinq()
            {
                try
                {
                    DataTable dt = new DataTable();
                    dt.Columns.Add("Id");
                    dt.Columns.Add("Name");
                    dt.Columns.Add("Skill");
                   // dt.Columns.Add();
    
                    dt.Rows.Add(1, "Mark", "Dotnet");
                    dt.Rows.Add(2, "John","Sql");
                    dt.Rows.Add(1, "Mark", "WCF");
                    dt.Rows.Add(3, "Chris", "MVC");
                    dt.Rows.Add(3, "Chris", "WPF");
                    dt.Rows.Add(2, "John", "LINQ");
                    dt.Rows.Add(2, "John", "Sql");   //without space at the end
     
                   // dt.Rows.Add(2, "John", "Sql  ");   //with space at the end
    
                    //find duplicates rows
                    var duplicateValues = (from row in dt.AsEnumerable()
                                          let ID= row.Field<string>("Id")
                                          let NAME = row.Field<string>("Name")
                                          let SKILL = row.Field<string>("Skill")
                                          group row by new {ID,NAME,SKILL} into grp
                                          where grp.Count() >1
                                          select new 
                                          {
                                          DupID = grp.Key.ID,
                                          DupName = grp.Key.NAME,
                                          DupSkill = grp.Key.SKILL
                                          }).ToList();
                    string dupValue =string.Empty;
                    foreach (var dup in duplicateValues)
                    { 
                        dupValue = dup.DupID + " - " + dup.DupName + " - " + dup.DupSkill;
                        MessageBox.Show("Duplicate entry:" + dupValue);
                    }
    
                    if (duplicateValues.Count ==0)
                        MessageBox.Show("No duplicate entry");
    
                }
                catch(Exception ex)
                {
                    MessageBox.Show(ex.Message);
                }
            
            }

    How to identity the duplicate values even if the values have space in them?

    How to identity space in the values and filter the duplicate rows in LINQ ?

    Thanks

    Ashok

    Friday, October 18, 2013 1:43 PM

Answers

  • Hi Ashok;

    I have modified your query and some statements after it. This code should give you what you need.

    var duplicateValues = (from row in dt.AsEnumerable()
                           orderby row.Field<string>("Id")
                           select new DuplicateObject
                           {
                               Id = row.Field<string>("Id"),
                               Name = row.Field<string>("Name"),
                               Skill = row.Field<string>("Skill")
                           }).Distinct(new DuplicateObjectComparer()).ToList();
    
    string dupValue = string.Empty;
    foreach (var dup in duplicateValues)
    {
        dupValue = dup.Id + " - " + dup.Name + " - " + dup.Skill;
        Console.WriteLine("Duplicate entry:" + dupValue);
    }
    
    if (duplicateValues.Count == 0)
        Console.WriteLine("No duplicate entry");
    
    // Supporting classes
    // Gives a strongly type class from the Linq query    
    public class DuplicateObject
    {
        public string Id { get; set; }
        public string Name { get; set; }
        public string Skill { get; set; }
    }
    
    // Custom comparer for the DuplicateObject class
    // Compares to object of type DuplicateObject handling case and spaces.
    class DuplicateObjectComparer : IEqualityComparer<DuplicateObject>
    {
        // DuplicateObject are equal if their ID, Names and Skills are equal. 
        public bool Equals(DuplicateObject x, DuplicateObject y)
        {
    
            //Check whether the compared objects reference the same data. 
            if (Object.ReferenceEquals(x, y)) return true;
    
            //Check whether any of the compared objects is null. 
            if (Object.ReferenceEquals(x, null) || Object.ReferenceEquals(y, null))
                return false;
    
            //Check whether the DuplicateObjects' properties are equal. 
            return x.Id.ToUpper().Trim() == y.Id.ToUpper().Trim() && x.Name.ToUpper().Trim() == y.Name.ToUpper().Trim() && x.Skill.ToUpper().Trim() == y.Skill.ToUpper().Trim();
        }
    
        // If Equals() returns true for a pair of objects  
        // then GetHashCode() must return the same value for these objects. 
    
        public int GetHashCode(DuplicateObject dup)
        {
            //Check whether the object is null 
            if (Object.ReferenceEquals(dup, null)) return 0;
    
            // Get hash code for the ID field if it is not null
            int hashDupId = dup.Id == null ? 0 : dup.Id.ToUpper().Trim().GetHashCode();
    
            //Get hash code for the Name field if it is not null. 
            int hashDupName = dup.Name == null ? 0 : dup.Name.ToUpper().Trim().GetHashCode();
    
            //Get hash code for the Skill field. 
            int hashDupSkill = dup.Skill.ToUpper().Trim().GetHashCode();
    
            //Calculate the hash code for the product. 
            return hashDupId ^ hashDupName ^ hashDupSkill;
        }
    
    }

      


    Fernando (MCSD)

    If a post answers your question, please click "Mark As Answer" on that post and "Mark as Helpful".

    NOTE: If I ask for code, please provide something that I can drop directly into a project and run (including XAML), or an actual application project. I'm trying to help a lot of people, so I don't have time to figure out weird snippets with undefined objects and unknown namespaces.

    Friday, October 18, 2013 6:35 PM