locked
Best approach to find duplicate employee from List<T> using LINQ RRS feed

  • Question

  • suppose i have many duplicate employee in List based on employee name and location id. tell me best approach to follow for this scenario. after my google search i found few approach but do not understand which i should use. here giving those approach.

    code taken from https://stackoverflow.com/questions/18547354/c-sharp-linq-find-duplicates-in-list#:~:text=To%20find%20the%20duplicate%20values,Any(g%20%3D%3E%20g.

    1) 

    var query = from s2 in (from s in someList group s by new { s.Column1, s.Column2 } into sg select sg) where s2.Count() > 1 select s2;

    2)

    var anyDuplicate = enumerable.GroupBy(x => x.Key).Any(g => g.Count() > 1);

    OR

    var duplicates = model.list
                        .GroupBy(s => s.SAME_ID)
                        .Where(g => g.Count() > 1).Count() > 0;
    if(duplicates) {
        doSomething();
    }

    3)

    //Dummy class to compare in list
    public class Person
    {
        public int Id { get; set; }
        public string Name { get; set; }
        public string Surname { get; set; }
        public Person(int id, string name, string surname)
        {
            this.Id = id;
            this.Name = name;
            this.Surname = surname;
        }
    }
    
    
    //The extention static class
    public static class Extention
    {
        public static IEnumerable<T> getMoreThanOnceRepeated<T>(this IEnumerable<T> extList, Func<T, object> groupProps) where T : class
        { //Return only the second and next reptition
            return extList
                .GroupBy(groupProps)
                .SelectMany(z => z.Skip(1)); //Skip the first occur and return all the others that repeats
        }
        public static IEnumerable<T> getAllRepeated<T>(this IEnumerable<T> extList, Func<T, object> groupProps) where T : class
        {
            //Get All the lines that has repeating
            return extList
                .GroupBy(groupProps)
                .Where(z => z.Count() > 1) //Filter only the distinct one
                .SelectMany(z => z);//All in where has to be retuned
        }
    }
    
    //how to use it:
    void DuplicateExample()
    {
        //Populate List
        List<Person> PersonsLst = new List<Person>(){
        new Person(1,"Ricardo","Figueiredo"), //fist Duplicate to the example
        new Person(2,"Ana","Figueiredo"),
        new Person(3,"Ricardo","Figueiredo"),//second Duplicate to the example
        new Person(4,"Margarida","Figueiredo"),
        new Person(5,"Ricardo","Figueiredo")//third Duplicate to the example
        };
    
        Console.WriteLine("All:");
        PersonsLst.ForEach(z => Console.WriteLine("{0} -> {1} {2}", z.Id, z.Name, z.Surname));
        /* OUTPUT:
            All:
            1 -> Ricardo Figueiredo
            2 -> Ana Figueiredo
            3 -> Ricardo Figueiredo
            4 -> Margarida Figueiredo
            5 -> Ricardo Figueiredo
            */
    
        Console.WriteLine("All lines with repeated data");
        PersonsLst.getAllRepeated(z => new { z.Name, z.Surname })
            .ToList()
            .ForEach(z => Console.WriteLine("{0} -> {1} {2}", z.Id, z.Name, z.Surname));
        /* OUTPUT:
            All lines with repeated data
            1 -> Ricardo Figueiredo
            3 -> Ricardo Figueiredo
            5 -> Ricardo Figueiredo
            */
        Console.WriteLine("Only Repeated more than once");
        PersonsLst.getMoreThanOnceRepeated(z => new { z.Name, z.Surname })
            .ToList()
            .ForEach(z => Console.WriteLine("{0} -> {1} {2}", z.Id, z.Name, z.Surname));
        /* OUTPUT:
            Only Repeated more than once
            3 -> Ricardo Figueiredo
            5 -> Ricardo Figueiredo
            */
    }

    5) var unique = list.GroupBy(x => x.Key).All(g => g.Count() == 1);

    6) 

    private static void Main()
    {
        List<Employee> empList = new List<Employee>();
    
        empList.Add(new Employee() { ID = 1, FName = "John", Age=23, Sex='M'  });
        empList.Add(new Employee() { ID = 2, FName = "Mary", Age = 25, Sex = 'F' });
        empList.Add(new Employee() { ID = 3, FName = "Amber", Age = 23, Sex = 'M' });
        empList.Add(new Employee() { ID = 4, FName = "Kathy", Age=25, Sex='M'});
        empList.Add(new Employee() { ID = 5, FName = "Lena", Age=27, Sex='F'});
        empList.Add(new Employee() { ID = 6, FName = "John", Age = 28, Sex = 'M' });
        empList.Add(new Employee() { ID = 7, FName = "Kathy", Age = 27, Sex = 'F' });
        empList.Add(new Employee() { ID = 8, FName = "John", Age = 28, Sex = 'M' });
    
        var dup = empList
            .GroupBy(x => new { x.FName })
            .Select(group => new { Name = group.Key, Count = group.Count() })
            .OrderByDescending(x => x.Count);
    
        foreach (var x in dup)
        {
            Response.Write(x.Count + " " + x.Name);
        }
    }
    
    
    class Employee
    {
        public int ID { get; set; }
        public string FName { get; set; }
        public int Age { get; set; }
        public char Sex { get; set; }
    }
    7) 
    string[] array = { "First", "Second", "Third", "First", "Third" }; 
    var i= CheckforDuplicates(array);
    
    public bool CheckforDuplicates(string[] array)
          {
              var duplicates = array
               .GroupBy(p => p)
               .Where(g => g.Count() > 1)
               .Select(g => g.Key);
    
    
              return (duplicates.Count() > 0);
    
    
    
          }
    which one i should follow. thanks
    Thursday, August 27, 2020 10:38 AM

Answers

  • For detecting duplicates by multiple keys.

    If the results are needed more than just the place the query is executed

    var groupResults = PersonsLst
        .GroupBy((person) => new { person.Name, person.Surname })
        .Select((g) => 
            new PersonGroup()
            {
                Count = g.Count(),
                FirstName = g.Key.Name,
                SurName = g.Key.Surname,
                List = g.ToList()
            })
        .Where(personGroup => personGroup.Count >1)
        .ToList();

    Class for above

    public class PersonGroup
    {
        public int Count { get; internal set; }
        public string FirstName { get; set; }
        public string SurName { get; set; }
        public List<Person> List { get; set; }
    }

    Usage

    foreach (var personGroup in groupResults)
    {
        Console.WriteLine($"{personGroup.FirstName} {personGroup.SurName} => {string.Join(",",personGroup.List.Select(x => x.Id).ToArray())}");
    }

    Results

    Ricardo Figueiredo => 1,3,5

    Then there is always an option to ignore duplicates

    public class NameSurnameEqualityComparer : IEqualityComparer<Person>
    {
        public bool Equals(Person x, Person y)
        {
            if (ReferenceEquals(x, y)) return true;
            if (ReferenceEquals(x, null)) return false;
            if (ReferenceEquals(y, null)) return false;
            if (x.GetType() != y.GetType()) return false;
    
            return string.Equals(x.Name, y.Name, 
                StringComparison.InvariantCultureIgnoreCase) && 
                   string.Equals(x.Surname, y.Surname, 
                       StringComparison.InvariantCultureIgnoreCase);
        }
    
        public int GetHashCode(Person person)
        {
            unchecked
            {
                return ((person.Name != null ? 
                    StringComparer.InvariantCultureIgnoreCase.GetHashCode(person.Name) : 0) * 397) ^ (person.Surname != null ? 
                    StringComparer.InvariantCultureIgnoreCase.GetHashCode(person.Surname) : 0);
            }
        }
    }

    Usage

    var compareResults = PersonsLst.Distinct(new NameSurnameEqualityComparer()).ToList();
    foreach (var person in compareResults)
    {
        Console.WriteLine(person);
    }

    I added this to Person

    public override string ToString() => $"{Id} {Name} {Surname}";

    Results

    1 Ricardo Figueiredo
    2 Ana Figueiredo
    4 Margarida Figueiredo


    Please remember to mark the replies as answers if they help and unmarked them if they provide no help, this will help others who are looking for solutions to the same or similar problem. Contact via my Twitter (Karen Payne) or Facebook (Karen Payne) via my MSDN profile but will not answer coding question on either.

    NuGet BaseConnectionLibrary for database connections.

    StackOverFlow
    profile for Karen Payne on Stack Exchange


    • Edited by KareninstructorMVP Thursday, August 27, 2020 2:05 PM added missing code
    • Proposed as answer by CoolDadTx Thursday, August 27, 2020 3:11 PM
    • Marked as answer by Sudip_inn Thursday, August 27, 2020 5:00 PM
    Thursday, August 27, 2020 12:35 PM