1. 引言

Java集合框架是Java编程中最重要的组成部分之一,它提供了一套标准化的接口和实现,用于存储和操作数据集合。在Java集合框架中,List和Set是两个最常用的集合接口,它们分别代表了有序可重复集合和无序不重复集合。本文将结合代码示例,详细讲解List集合、Set集合的特性、用法以及Set集合的去重原理。

2. List集合详解

2.1 List接口特性

List接口继承自Collection接口,主要特点包括:

  • 有序性:元素按照插入顺序存储,可以通过索引访问
  • 可重复性:允许存储重复元素
  • 索引访问:提供了基于索引的访问方法

2.2 常用List实现类

import java.util.*;

public class ListExample {
    public static void main(String[] args) {
        // 1. ArrayList - 基于动态数组实现
        List<String> arrayList = new ArrayList<>();
        arrayList.add("Java");
        arrayList.add("Python");
        arrayList.add("Java"); // 允许重复
        arrayList.add("C++");
        
        System.out.println("ArrayList元素: " + arrayList);
        System.out.println("索引1处的元素: " + arrayList.get(1));
        
        // 2. LinkedList - 基于双向链表实现
        List<String> linkedList = new LinkedList<>();
        linkedList.add("Apple");
        linkedList.add("Banana");
        linkedList.add("Orange");
        
        // LinkedList特有的方法
        LinkedList<String> linkedList2 = (LinkedList<String>) linkedList;
        linkedList2.addFirst("Mango"); // 添加到开头
        linkedList2.addLast("Grape");  // 添加到末尾
        
        System.out.println("LinkedList元素: " + linkedList2);
        
        // 3. Vector - 线程安全的动态数组(已过时,推荐使用CopyOnWriteArrayList)
        List<String> vector = new Vector<>();
        vector.add("Element1");
        vector.add("Element2");
        System.out.println("Vector元素: " + vector);
    }
}

2.3 List常用操作示例

import java.util.*;

public class ListOperations {
    public static void main(String[] args) {
        List<Integer> numbers = new ArrayList<>();
        
        // 添加元素
        numbers.add(10);
        numbers.add(20);
        numbers.add(30);
        numbers.add(20); // 重复元素
        numbers.add(40);
        
        System.out.println("原始列表: " + numbers);
        
        // 遍历方式
        System.out.println("\n=== 遍历方式 ===");
        
        // 1. 普通for循环
        System.out.print("普通for循环: ");
        for (int i = 0; i < numbers.size(); i++) {
            System.out.print(numbers.get(i) + " ");
        }
        
        // 2. 增强for循环
        System.out.print("\n增强for循环: ");
        for (Integer num : numbers) {
            System.out.print(num + " ");
        }
        
        // 3. 迭代器
        System.out.print("\n迭代器遍历: ");
        Iterator<Integer> iterator = numbers.iterator();
        while (iterator.hasNext()) {
            System.out.print(iterator.next() + " ");
        }
        
        // 4. forEach + Lambda
        System.out.print("\nforEach遍历: ");
        numbers.forEach(num -> System.out.print(num + " "));
        
        // 常用操作
        System.out.println("\n\n=== 常用操作 ===");
        
        // 查找元素
        int index = numbers.indexOf(20);
        System.out.println("元素20第一次出现的位置: " + index);
        
        // 修改元素
        numbers.set(1, 25);
        System.out.println("修改后列表: " + numbers);
        
        // 删除元素
        numbers.remove(Integer.valueOf(20)); // 删除第一个20
        System.out.println("删除20后: " + numbers);
        
        // 排序
        numbers.sort(Comparator.naturalOrder());
        System.out.println("排序后: " + numbers);
        
        // 子列表
        List<Integer> subList = numbers.subList(1, 3);
        System.out.println("子列表(1-3): " + subList);
    }
}

3. Set集合详解

3.1 Set接口特性

Set接口继承自Collection接口,主要特点包括:

  • 无序性:不保证元素的存储顺序(LinkedHashSet除外)
  • 不可重复性:不允许存储重复元素
  • 无索引访问:不能通过索引访问元素

3.2 常用Set实现类

import java.util.*;

public class SetExample {
    public static void main(String[] args) {
        // 1. HashSet - 基于哈希表实现,无序
        Set<String> hashSet = new HashSet<>();
        hashSet.add("Apple");
        hashSet.add("Banana");
        hashSet.add("Orange");
        hashSet.add("Apple"); // 重复元素,不会被添加
        
        System.out.println("HashSet元素: " + hashSet);
        System.out.println("HashSet大小: " + hashSet.size());
        
        // 2. LinkedHashSet - 基于哈希表和链表,保持插入顺序
        Set<String> linkedHashSet = new LinkedHashSet<>();
        linkedHashSet.add("First");
        linkedHashSet.add("Second");
        linkedHashSet.add("Third");
        linkedHashSet.add("First"); // 重复
        
        System.out.println("\nLinkedHashSet元素: " + linkedHashSet);
        
        // 3. TreeSet - 基于红黑树,自然排序
        Set<String> treeSet = new TreeSet<>();
        treeSet.add("Zebra");
        treeSet.add("Apple");
        treeSet.add("Monkey");
        treeSet.add("Banana");
        
        System.out.println("\nTreeSet元素(自动排序): " + treeSet);
        
        // 自定义排序的TreeSet
        Set<Integer> customTreeSet = new TreeSet<>(Comparator.reverseOrder());
        customTreeSet.add(5);
        customTreeSet.add(1);
        customTreeSet.add(8);
        customTreeSet.add(3);
        
        System.out.println("倒序TreeSet: " + customTreeSet);
    }
}

3.3 Set常用操作示例

import java.util.*;

public class SetOperations {
    public static void main(String[] args) {
        Set<Integer> set1 = new HashSet<>(Arrays.asList(1, 2, 3, 4, 5));
        Set<Integer> set2 = new HashSet<>(Arrays.asList(4, 5, 6, 7, 8));
        
        System.out.println("Set1: " + set1);
        System.out.println("Set2: " + set2);
        
        // 并集
        Set<Integer> union = new HashSet<>(set1);
        union.addAll(set2);
        System.out.println("\n并集: " + union);
        
        // 交集
        Set<Integer> intersection = new HashSet<>(set1);
        intersection.retainAll(set2);
        System.out.println("交集: " + intersection);
        
        // 差集 (set1 - set2)
        Set<Integer> difference = new HashSet<>(set1);
        difference.removeAll(set2);
        System.out.println("差集(set1 - set2): " + difference);
        
        // 对称差集 (并集 - 交集)
        Set<Integer> symmetricDifference = new HashSet<>(union);
        symmetricDifference.removeAll(intersection);
        System.out.println("对称差集: " + symmetricDifference);
        
        // 判断子集
        Set<Integer> subset = new HashSet<>(Arrays.asList(2, 3));
        System.out.println("\n" + subset + " 是 " + set1 + " 的子集吗? " + set1.containsAll(subset));
        
        // 遍历Set
        System.out.println("\n=== Set遍历 ===");
        System.out.print("增强for循环: ");
        for (Integer num : set1) {
            System.out.print(num + " ");
        }
        
        System.out.print("\n迭代器遍历: ");
        Iterator<Integer> iterator = set1.iterator();
        while (iterator.hasNext()) {
            System.out.print(iterator.next() + " ");
        }
        
        System.out.print("\nforEach遍历: ");
        set1.forEach(num -> System.out.print(num + " "));
    }
}

4. Set集合去重原理

4.1 去重的基本原理

Set集合的去重机制主要依赖于两个方法:hashCode()equals()。当向Set中添加元素时,会按照以下流程判断是否重复:

返回true

返回false

调用add(element)方法

计算element.hashCode()

哈希表中是否存在
相同哈希值的元素?

直接添加元素

调用element.equals()
与已存在元素比较

元素重复,不添加

哈希冲突,添加到链表/红黑树

4.2 hashCode()和equals()方法的作用

import java.util.*;

class Student {
    private String id;
    private String name;
    private int age;
    
    public Student(String id, String name, int age) {
        this.id = id;
        this.name = name;
        this.age = age;
    }
    
    // 错误的实现:没有重写hashCode和equals
    // Set将无法正确去重
    
    // 正确的实现:重写hashCode和equals
    @Override
    public int hashCode() {
        return Objects.hash(id, name, age);
    }
    
    @Override
    public boolean equals(Object obj) {
        if (this == obj) return true;
        if (obj == null || getClass() != obj.getClass()) return false;
        Student student = (Student) obj;
        return age == student.age && 
               Objects.equals(id, student.id) && 
               Objects.equals(name, student.name);
    }
    
    @Override
    public String toString() {
        return "Student{id='" + id + "', name='" + name + "', age=" + age + "}";
    }
}

public class SetDeduplication {
    public static void main(String[] args) {
        System.out.println("=== 未重写hashCode和equals的情况 ===");
        Set<Student> badSet = new HashSet<>();
        
        Student s1 = new Student("001", "张三", 20);
        Student s2 = new Student("001", "张三", 20);
        Student s3 = new Student("002", "李四", 21);
        
        badSet.add(s1);
        badSet.add(s2); // 应该被去重,但实际会添加
        badSet.add(s3);
        
        System.out.println("错误实现的Set大小: " + badSet.size()); // 输出3,应该是2
        System.out.println("Set内容: " + badSet);
        
        System.out.println("\n=== 重写hashCode和equals的情况 ===");
        
        // 使用正确的Student类(已重写hashCode和equals)
        Set<Student> goodSet = new HashSet<>();
        
        Student s4 = new Student("001", "张三", 20);
        Student s5 = new Student("001", "张三", 20); // 重复,不会被添加
        Student s6 = new Student("002", "李四", 21);
        
        goodSet.add(s4);
        goodSet.add(s5);
        goodSet.add(s6);
        
        System.out.println("正确实现的Set大小: " + goodSet.size()); // 输出2
        System.out.println("Set内容: " + goodSet);
    }
}

4.3 不同Set实现类的去重特点

import java.util.*;

public class SetDeduplicationComparison {
    public static void main(String[] args) {
        // 测试数据:包含重复元素的列表
        List<String> dataWithDuplicates = Arrays.asList(
            "Apple", "Banana", "Orange", "Apple", "Banana", "Grape"
        );
        
        System.out.println("原始数据(含重复): " + dataWithDuplicates);
        
        // 1. 使用HashSet去重(无序)
        Set<String> hashSet = new HashSet<>(dataWithDuplicates);
        System.out.println("\n1. HashSet去重结果(无序): " + hashSet);
        
        // 2. 使用LinkedHashSet去重(保持插入顺序)
        Set<String> linkedHashSet = new LinkedHashSet<>(dataWithDuplicates);
        System.out.println("2. LinkedHashSet去重结果(保持顺序): " + linkedHashSet);
        
        // 3. 使用TreeSet去重(排序)
        Set<String> treeSet = new TreeSet<>(dataWithDuplicates);
        System.out.println("3. TreeSet去重结果(排序): " + treeSet);
        
        // 4. 自定义对象的去重
        System.out.println("\n=== 自定义对象去重示例 ===");
        
        List<Product> products = Arrays.asList(
            new Product("P001", "Laptop", 999.99),
            new Product("P002", "Phone", 699.99),
            new Product("P001", "Laptop", 999.99), // 重复
            new Product("P003", "Tablet", 399.99),
            new Product("P002", "Phone", 699.99)   // 重复
        );
        
        // 基于id去重
        Set<Product> productSet = new HashSet<>(products);
        System.out.println("去重后产品数量: " + productSet.size());
        productSet.forEach(System.out::println);
    }
}

class Product {
    private String id;
    private String name;
    private double price;
    
    public Product(String id, String name, double price) {
        this.id = id;
        this.name = name;
        this.price = price;
    }
    
    @Override
    public int hashCode() {
        return Objects.hash(id);
    }
    
    @Override
    public boolean equals(Object obj) {
        if (this == obj) return true;
        if (obj == null || getClass() != obj.getClass()) return false;
        Product product = (Product) obj;
        return Objects.equals(id, product.id);
    }
    
    @Override
    public String toString() {
        return String.format("Product{id='%s', name='%s', price=%.2f}", id, name, price);
    }
}

4.4 实际应用:使用Set进行数据去重

import java.util.*;
import java.util.stream.Collectors;

public class PracticalDeduplication {
    public static void main(String[] args) {
        // 场景1:从列表中去除重复元素
        List<String> emails = Arrays.asList(
            "alice@example.com",
            "bob@example.com",
            "alice@example.com", // 重复
            "charlie@example.com",
            "bob@example.com"    // 重复
        );
        
        // 方法1:使用HashSet
        Set<String> uniqueEmails1 = new HashSet<>(emails);
        System.out.println("方法1 - HashSet去重: " + uniqueEmails1);
        
        // 方法2:使用LinkedHashSet保持顺序
        Set<String> uniqueEmails2 = new LinkedHashSet<>(emails);
        System.out.println("方法2 - LinkedHashSet去重(保持顺序): " + uniqueEmails2);
        
        // 方法3:使用Java 8 Stream API
        List<String> uniqueEmails3 = emails.stream()
                                          .distinct()
                                          .collect(Collectors.toList());
        System.out.println("方法3 - Stream去重: " + uniqueEmails3);
        
        // 场景2:统计不重复单词
        String text = "Java is a programming language Java is widely used";
        String[] words = text.split("\\s+");
        
        Set<String> uniqueWords = new HashSet<>(Arrays.asList(words));
        System.out.println("\n原文: " + text);
        System.out.println("不重复单词数: " + uniqueWords.size());
        System.out.println("不重复单词: " + uniqueWords);
        
        // 场景3:自定义去重逻辑
        List<Employee> employees = Arrays.asList(
            new Employee("E001", "张三", "研发部"),
            new Employee("E002", "李四", "市场部"),
            new Employee("E001", "张三", "研发部"), // 重复
            new Employee("E003", "王五", "研发部"),
            new Employee("E002", "李四", "市场部")  // 重复
        );
        
        // 按员工ID去重
        Set<Employee> uniqueEmployees = new HashSet<>(employees);
        System.out.println("\n员工去重结果:");
        uniqueEmployees.forEach(System.out::println);
        
        // 按部门统计不重复员工
        Map<String, Set<Employee>> employeesByDept = employees.stream()
            .collect(Collectors.groupingBy(
                Employee::getDepartment,
                Collectors.toCollection(HashSet::new)
            ));
        
        System.out.println("\n按部门分组(已去重):");
        employeesByDept.forEach((dept, empSet) -> {
            System.out.println(dept + ": " + empSet.size() + "人");
        });
    }
}

class Employee {
    private String id;
    private String name;
    private String department;
    
    public Employee(String id, String name, String department) {
        this.id = id;
        this.name = name;
        this.department = department;
    }
    
    @Override
    public int hashCode() {
        return Objects.hash(id);
    }
    
    @Override
    public boolean equals(Object obj) {
        if (this == obj) return true;
        if (obj == null || getClass() != obj.getClass()) return false;
        Employee employee = (Employee) obj;
        return Objects.equals(id, employee.id);
    }
    
    public String getDepartment() {
        return department;
    }
    
    @Override
    public String toString() {
        return String.format("Employee{id='%s', name='%s', dept='%s'}", id, name, department);
    }
}

5. Li

更多推荐