Scala Collections

The Scala collection framework is one of the language's most powerful features, providing rich data structures and operation methods. Collections are divided into two major categories: mutable and immutable.

Collection Hierarchy

Collection Types Overview

object CollectionOverview {
  def main(args: Array[String]): Unit = {
    // Immutable collections (default)
    val list = List(1, 2, 3, 4, 5)
    val vector = Vector(1, 2, 3, 4, 5)
    val set = Set(1, 2, 3, 4, 5)
    val map = Map("a" -> 1, "b" -> 2, "c" -> 3)
    
    println(s"List: $list")
    println(s"Vector: $vector")
    println(s"Set: $set")
    println(s"Map: $map")
    
    // Mutable collections
    import scala.collection.mutable
    val mutableList = mutable.ListBuffer(1, 2, 3)
    val mutableSet = mutable.Set(1, 2, 3)
    val mutableMap = mutable.Map("a" -> 1, "b" -> 2)
    
    println(s"Mutable List: $mutableList")
    println(s"Mutable Set: $mutableSet")
    println(s"Mutable Map: $mutableMap")
    
    // Modify mutable collections
    mutableList += 4
    mutableSet += 4
    mutableMap += ("d" -> 4)
    
    println(s"After modification:")
    println(s"Mutable List: $mutableList")
    println(s"Mutable Set: $mutableSet")
    println(s"Mutable Map: $mutableMap")
  }
}

List (List)

List Basic Operations

object ListOperations {
  def main(args: Array[String]): Unit = {
    // Create List
    val list1 = List(1, 2, 3, 4, 5)
    val list2 = 1 :: 2 :: 3 :: 4 :: 5 :: Nil
    val list3 = List.range(1, 6)
    val list4 = List.fill(5)(0)
    val list5 = List.tabulate(5)(i => i * i)
    
    println(s"list1: $list1")
    println(s"list2: $list2")
    println(s"list3: $list3")
    println(s"list4: $list4")
    println(s"list5: $list5")
    
    // Basic operations
    println(s"Head: ${list1.head}")
    println(s"Tail: ${list1.tail}")
    println(s"Last: ${list1.last}")
    println(s"Init: ${list1.init}")
    println(s"Length: ${list1.length}")
    println(s"Is empty: ${list1.isEmpty}")
    
    // Add elements
    val newList1 = 0 :: list1  // Prepend
    val newList2 = list1 :+ 6  // Append
    val newList3 = list1 ++ List(6, 7, 8)  // Concatenate
    
    println(s"Prepend 0: $newList1")
    println(s"Append 6: $newList2")
    println(s"Concatenate: $newList3")
    
    // Access elements
    println(s"Element at index 2: ${list1(2)}")
    println(s"Take 3: ${list1.take(3)}")
    println(s"Drop 2: ${list1.drop(2)}")
    println(s"Slice(1, 4): ${list1.slice(1, 4)}")
  }
}

List Higher-Order Functions

object ListHigherOrderFunctions {
  def main(args: Array[String]): Unit = {
    val numbers = List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
    val words = List("scala", "java", "python", "javascript")
    
    // map - transform
    val doubled = numbers.map(_ * 2)
    val lengths = words.map(_.length)
    println(s"Doubled: $doubled")
    println(s"Word lengths: $lengths")
    
    // filter - filter
    val evens = numbers.filter(_ % 2 == 0)
    val longWords = words.filter(_.length > 4)
    println(s"Even numbers: $evens")
    println(s"Long words: $longWords")
    
    // flatMap - flatten and map
    val chars = words.flatMap(_.toList)
    val pairs = numbers.flatMap(x => List(x, x * 10))
    println(s"All characters: $chars")
    println(s"Pairs: $pairs")
    
    // reduce and fold
    val sum = numbers.reduce(_ + _)
    val product = numbers.fold(1)(_ * _)
    val max = numbers.reduce(_ max _)
    println(s"Sum: $sum")
    println(s"Product: $product")
    println(s"Max: $max")
    
    // scan - scan (keeps intermediate results)
    val runningSum = numbers.scanLeft(0)(_ + _)
    val runningProduct = numbers.scanLeft(1)(_ * _)
    println(s"Running sum: $runningSum")
    println(s"Running product: $runningProduct")
    
    // Grouping and partitioning
    val (evens2, odds) = numbers.partition(_ % 2 == 0)
    val grouped = numbers.groupBy(_ % 3)
    println(s"Evens: $evens2, Odds: $odds")
    println(s"Grouped by remainder: $grouped")
    
    // Sorting
    val shuffled = List(5, 2, 8, 1, 9, 3)
    println(s"Sorted: ${shuffled.sorted}")
    println(s"Sorted descending: ${shuffled.sortWith(_ > _)}")
    println(s"Sort by length: ${words.sortBy(_.length)}")
  }
}

Vector (Vector)

Vector Characteristics

object VectorOperations {
  def main(args: Array[String]): Unit = {
    // Vector creation
    val vector1 = Vector(1, 2, 3, 4, 5)
    val vector2 = Vector.range(1, 6)
    val vector3 = Vector.fill(5)(0)
    
    println(s"vector1: $vector1")
    println(s"vector2: $vector2")
    println(s"vector3: $vector3")
    
    // Vector advantages: efficient random access and updates
    val largeVector = Vector.range(1, 1000000)
    
    // Random access - O(log n) but practically close to O(1)
    val element = largeVector(500000)
    println(s"Element at 500000: $element")
    
    // Update operation - creates new Vector
    val updated = vector1.updated(2, 99)
    println(s"Original: $vector1")
    println(s"Updated: $updated")
    
    // Add elements
    val prepended = 0 +: vector1
    val appended = vector1 :+ 6
    println(s"Prepended: $prepended")
    println(s"Appended: $appended")
    
    // Vector vs List performance comparison
    def timeOperation[T](operation: => T): Long = {
      val start = System.nanoTime()
      operation
      val end = System.nanoTime()
      end - start
    }
    
    val size = 100000
    val list = List.range(1, size)
    val vector = Vector.range(1, size)
    
    // Random access performance
    val listAccessTime = timeOperation(list(size / 2))
    val vectorAccessTime = timeOperation(vector(size / 2))
    
    println(s"List random access: ${listAccessTime}ns")
    println(s"Vector random access: ${vectorAccessTime}ns")
    
    // Prepend performance
    val listPrependTime = timeOperation(0 :: list)
    val vectorPrependTime = timeOperation(0 +: vector)
    
    println(s"List prepend: ${listPrependTime}ns")
    println(s"Vector prepend: ${vectorPrependTime}ns")
  }
}

Set (Set)

Set Operations

object SetOperations {
  def main(args: Array[String]): Unit = {
    // Create Set
    val set1 = Set(1, 2, 3, 4, 5)
    val set2 = Set(4, 5, 6, 7, 8)
    val set3 = Set(1, 1, 2, 2, 3, 3)  // Auto deduplication
    
    println(s"set1: $set1")
    println(s"set2: $set2")
    println(s"set3 (duplicates removed): $set3")
    
    // Basic operations
    println(s"Contains 3: ${set1.contains(3)}")
    println(s"Size: ${set1.size}")
    println(s"Is empty: ${set1.isEmpty}")
    
    // Add and delete elements
    val added = set1 + 6
    val removed = set1 - 3
    val multipleAdded = set1 ++ Set(6, 7, 8)
    val multipleRemoved = set1 -- Set(1, 2)
    
    println(s"Added 6: $added")
    println(s"Removed 3: $removed")
    println(s"Multiple added: $multipleAdded")
    println(s"Multiple removed: $multipleRemoved")
    
    // Set operations
    val union = set1 union set2  // or set1 | set2
    val intersection = set1 intersect set2  // or set1 & set2
    val difference = set1 diff set2  // or set1 &~ set2
    
    println(s"Union: $union")
    println(s"Intersection: $intersection")
    println(s"Difference: $difference")
    
    // Subset and superset
    val subset = Set(1, 2, 3)
    println(s"$subset is subset of $set1: ${subset.subsetOf(set1)}")
    println(s"$set1 is superset of $subset: ${set1.subsetOf(subset)}")
    
    // Different types of Set
    import scala.collection.mutable
    val mutableSet = mutable.Set(1, 2, 3)
    mutableSet += 4
    mutableSet -= 1
    println(s"Mutable set: $mutableSet")
    
    // SortedSet - ordered set
    import scala.collection.immutable.SortedSet
    val sortedSet = SortedSet(5, 1, 3, 2, 4)
    println(s"Sorted set: $sortedSet")
  }
}

Map (Mapping)

Map Basic Operations

object MapOperations {
  def main(args: Array[String]): Unit = {
    // Map creation
    val map1 = Map("a" -> 1, "b" -> 2, "c" -> 3)
    val map2 = Map(("x", 10), ("y", 20), ("z", 30))
    val map3 = Map.empty[String, Int]
    
    println(s"map1: $map1")
    println(s"map2: $map2")
    println(s"map3: $map3")
    
    // Access elements
    println(s"Value for 'a': ${map1("a")}")
    println(s"Get 'a': ${map1.get("a")}")
    println(s"Get 'd': ${map1.get("d")}")
    println(s"Get 'd' with default: ${map1.getOrElse("d", 0)}")
    
    // Check key existence
    println(s"Contains 'b': ${map1.contains("b")}")
    println(s"Contains 'd': ${map1.contains("d")}")
    
    // Add and update
    val updated = map1 + ("d" -> 4)
    val multipleUpdated = map1 ++ Map("d" -> 4, "e" -> 5)
    val removed = map1 - "a"
    val multipleRemoved = map1 -- List("a", "b")
    
    println(s"Updated: $updated")
    println(s"Multiple updated: $multipleUpdated")
    println(s"Removed: $removed")
    println(s"Multiple removed: $multipleRemoved")
    
    // Keys and values
    println(s"Keys: ${map1.keys}")
    println(s"Values: ${map1.values}")
    println(s"Key-value pairs: ${map1.toList}")
    
    // Map operations
    val doubled = map1.map { case (k, v) => k -> (v * 2) }
    val filtered = map1.filter { case (k, v) => v > 1 }
    
    println(s"Doubled values: $doubled")
    println(s"Filtered (value > 1): $filtered")
  }
}

Map Advanced Operations

object AdvancedMapOperations {
  def main(args: Array[String]): Unit = {
    val scores = Map(
      "Alice" -> 95,
      "Bob" -> 87,
      "Charlie" -> 92,
      "Diana" -> 78
    )
    
    // Lookup operations
    val topStudent = scores.maxBy(_._2)
    val bottomStudent = scores.minBy(_._2)
    val averageScore = scores.values.sum.toDouble / scores.size
    
    println(s"Top student: $topStudent")
    println(s"Bottom student: $bottomStudent")
    println(s"Average score: $averageScore")
    
    // Grouping operations
    val gradeRanges = scores.groupBy { case (_, score) =>
      score match {
        case s if s >= 90 => "A"
        case s if s >= 80 => "B"
        case s if s >= 70 => "C"
        case _ => "F"
      }
    }
    
    println("Grade distribution:")
    gradeRanges.foreach { case (grade, students) =>
      println(s"Grade $grade: ${students.keys.mkString(", ")}")
    }
    
    // Map merging
    val bonusPoints = Map("Alice" -> 5, "Bob" -> 3, "Eve" -> 10)
    
    val finalScores = scores ++ bonusPoints.map { case (name, bonus) =>
      name -> (scores.getOrElse(name, 0) + bonus)
    }
    
    println(s"Final scores: $finalScores")
    
    // Nested Map
    val studentData = Map(
      "Alice" -> Map("age" -> 20, "grade" -> 95, "year" -> 3),
      "Bob" -> Map("age" -> 19, "grade" -> 87, "year" -> 2)
    )
    
    println(s"Alice's age: ${studentData("Alice")("age")}")
    
    // Mutable Map
    import scala.collection.mutable
    val mutableScores = mutable.Map("Alice" -> 95, "Bob" -> 87)
    
    mutableScores("Charlie") = 92  // Add new element
    mutableScores("Alice") = 98    // Update existing element
    mutableScores.remove("Bob")    // Delete element
    
    println(s"Mutable scores: $mutableScores")
  }
}

Collection Conversions

Conversions Between Collections

object CollectionConversions {
  def main(args: Array[String]): Unit = {
    val list = List(1, 2, 3, 4, 5, 2, 3)
    val array = Array(1, 2, 3, 4, 5)
    val set = Set(1, 2, 3, 4, 5)
    val map = Map("a" -> 1, "b" -> 2, "c" -> 3)
    
    // From List
    println("From List:")
    println(s"List to Vector: ${list.toVector}")
    println(s"List to Set: ${list.toSet}")  // Deduplication
    println(s"List to Array: ${list.toArray.mkString(", ")}")
    
    // From Array
    println("\nFrom Array:")
    println(s"Array to List: ${array.toList}")
    println(s"Array to Vector: ${array.toVector}")
    println(s"Array to Set: ${array.toSet}")
    
    // From Set
    println("\nFrom Set:")
    println(s"Set to List: ${set.toList}")
    println(s"Set to Vector: ${set.toVector}")
    println(s"Set to Array: ${set.toArray.mkString(", ")}")
    
    // From Map
    println("\nFrom Map:")
    println(s"Map to List: ${map.toList}")
    println(s"Map keys to Set: ${map.keySet}")
    println(s"Map values to List: ${map.values.toList}")
    
    // String conversions
    val string = "hello"
    println(s"\nString to List: ${string.toList}")
    println(s"String to Vector: ${string.toVector}")
    println(s"String to Set: ${string.toSet}")
    
    // Range conversions
    val range = 1 to 10
    println(s"\nRange to List: ${range.toList}")
    println(s"Range to Vector: ${range.toVector}")
    println(s"Range to Set: ${range.toSet}")
  }
}

Collection Performance Comparison

Performance Characteristics

object CollectionPerformance {
  def main(args: Array[String]): Unit = {
    val size = 100000
    
    def timeOperation[T](name: String)(operation: => T): T = {
      val start = System.nanoTime()
      val result = operation
      val end = System.nanoTime()
      println(f"$name%20s: ${(end - start) / 1000000}%6d ms")
      result
    }
    
    println("Collection Performance Comparison:")
    println("=" * 50)
    
    // Creation performance
    println("\nCreation Performance:")
    val list = timeOperation("List creation") { List.range(1, size) }
    val vector = timeOperation("Vector creation") { Vector.range(1, size) }
    val array = timeOperation("Array creation") { Array.range(1, size) }
    
    // Random access performance
    println("\nRandom Access Performance:")
    val index = size / 2
    timeOperation("List access") { list(index) }
    timeOperation("Vector access") { vector(index) }
    timeOperation("Array access") { array(index) }
    
    // Prepend performance
    println("\nPrepend Performance:")
    timeOperation("List prepend") { 0 :: list }
    timeOperation("Vector prepend") { 0 +: vector }
    
    // Append performance
    println("\nAppend Performance:")
    timeOperation("List append") { list :+ (size + 1) }
    timeOperation("Vector append") { vector :+ (size + 1) }
    
    // Traversal performance
    println("\nIteration Performance:")
    timeOperation("List sum") { list.sum }
    timeOperation("Vector sum") { vector.sum }
    timeOperation("Array sum") { array.sum }
    
    // Memory usage recommendations
    println("\nMemory and Performance Guidelines:")
    println("List: Best for sequential access, prepending")
    println("Vector: Best for random access, general purpose")
    println("Array: Best for performance-critical code, interop with Java")
    println("Set: Best for membership testing, uniqueness")
    println("Map: Best for key-value lookups")
  }
}

Practical Application Examples

Data Processing Pipeline

object DataProcessingPipeline {
  case class Person(name: String, age: Int, city: String, salary: Double)
  
  def main(args: Array[String]): Unit = {
    val people = List(
      Person("Alice", 25, "New York", 75000),
      Person("Bob", 30, "San Francisco", 95000),
      Person("Charlie", 35, "New York", 85000),
      Person("Diana", 28, "Boston", 70000),
      Person("Eve", 32, "San Francisco", 105000),
      Person("Frank", 29, "Boston", 68000)
    )
    
    println("Original data:")
    people.foreach(println)
    
    // Data processing pipeline
    val analysis = people
      .filter(_.age >= 28)  // Filter by age
      .groupBy(_.city)      // Group by city
      .view.mapValues { cityPeople =>
        Map(
          "count" -> cityPeople.size,
          "avgAge" -> cityPeople.map(_.age).sum.toDouble / cityPeople.size,
          "avgSalary" -> cityPeople.map(_.salary).sum / cityPeople.size,
          "totalSalary" -> cityPeople.map(_.salary).sum
        )
      }.toMap
    
    println("\nAnalysis by city (age >= 28):")
    analysis.foreach { case (city, stats) =>
      println(s"$city:")
      stats.foreach { case (metric, value) =>
        println(f"  $metric: $value%.2f")
      }
    }
    
    // Salary statistics
    val salaryStats = people.map(_.salary)
    val sortedSalaries = salaryStats.sorted
    val median = if (sortedSalaries.length % 2 == 0) {
      (sortedSalaries(sortedSalaries.length / 2 - 1) + sortedSalaries(sortedSalaries.length / 2)) / 2
    } else {
      sortedSalaries(sortedSalaries.length / 2)
    }
    
    println(f"\nSalary Statistics:")
    println(f"Average: ${salaryStats.sum / salaryStats.length}%.2f")
    println(f"Median: $median%.2f")
    println(f"Min: ${salaryStats.min}%.2f")
    println(f"Max: ${salaryStats.max}%.2f")
    
    // Age distribution
    val ageGroups = people.groupBy { person =>
      person.age match {
        case age if age < 30 => "20s"
        case age if age < 40 => "30s"
        case _ => "40+"
      }
    }
    
    println("\nAge Distribution:")
    ageGroups.foreach { case (group, people) =>
      println(s"$group: ${people.map(_.name).mkString(", ")}")
    }
  }
}

Cache System

import scala.collection.mutable

object CacheSystem {
  class LRUCache[K, V](maxSize: Int) {
    private val cache = mutable.LinkedHashMap[K, V]()
    
    def get(key: K): Option[V] = {
      cache.remove(key) match {
        case Some(value) =>
          cache(key) = value  // Move to end (most recently used)
          Some(value)
        case None => None
      }
    }
    
    def put(key: K, value: V): Unit = {
      cache.remove(key)  // If exists, delete it first
      cache(key) = value
      
      // If exceeds max size, delete oldest element
      if (cache.size > maxSize) {
        cache.remove(cache.head._1)
      }
    }
    
    def size: Int = cache.size
    def keys: Set[K] = cache.keySet.toSet
    
    override def toString: String = cache.toString()
  }
  
  def main(args: Array[String]): Unit = {
    val cache = new LRUCache[String, Int](3)
    
    // Add elements
    cache.put("a", 1)
    cache.put("b", 2)
    cache.put("c", 3)
    println(s"After adding a, b, c: $cache")
    
    // Access element
    println(s"Get 'a': ${cache.get("a")}")
    println(s"After accessing 'a': $cache")
    
    // Add new element, should delete oldest 'b'
    cache.put("d", 4)
    println(s"After adding 'd': $cache")
    
    // Test cache hit rate
    val requests = List("a", "b", "c", "d", "a", "e", "f", "a")
    var hits = 0
    var misses = 0
    
    requests.foreach { key =>
      cache.get(key) match {
        case Some(_) => hits += 1
        case None => 
          misses += 1
          cache.put(key, key.hashCode)
      }
    }
    
    println(s"\nCache performance:")
    println(s"Hits: $hits, Misses: $misses")
    println(s"Hit rate: ${hits.toDouble / (hits + misses) * 100}%")
  }
}

Best Practices

  1. Choose the right collection type:

    • Need sequential access: List
    • Need random access: Vector
    • Need uniqueness: Set
    • Need key-value mapping: Map
  2. Immutable vs Mutable:

    • Use immutable collections by default
    • Consider mutable collections for performance-critical scenarios
    • Prioritize immutable for functional programming
  3. Performance considerations:

    • List suitable for prepend operations
    • Vector suitable for random access
    • Array suitable for performance-critical scenarios
  4. Memory efficiency:

    • Use view for lazy evaluation
    • Avoid unnecessary intermediate collections
    • Consider using Iterator to process large data
  5. Functional programming:

    • Use map, filter, reduce and other higher-order functions
    • Chain operations for readability
    • Avoid side effects

The Scala collection framework provides powerful and flexible data processing capabilities, mastering these collection types and operation methods is key to writing efficient Scala programs.