Skip to content

Scala Collections

The Scala collection framework is one of the language's most powerful features, providing rich data structures and operation methods. Collections are divided into two major categories: mutable and immutable.

Collection Hierarchy

Collection Types Overview

scala
object CollectionOverview {
  def main(args: Array[String]): Unit = {
    // Immutable collections (default)
    val list = List(1, 2, 3, 4, 5)
    val vector = Vector(1, 2, 3, 4, 5)
    val set = Set(1, 2, 3, 4, 5)
    val map = Map("a" -> 1, "b" -> 2, "c" -> 3)
    
    println(s"List: $list")
    println(s"Vector: $vector")
    println(s"Set: $set")
    println(s"Map: $map")
    
    // Mutable collections
    import scala.collection.mutable
    val mutableList = mutable.ListBuffer(1, 2, 3)
    val mutableSet = mutable.Set(1, 2, 3)
    val mutableMap = mutable.Map("a" -> 1, "b" -> 2)
    
    println(s"Mutable List: $mutableList")
    println(s"Mutable Set: $mutableSet")
    println(s"Mutable Map: $mutableMap")
    
    // Modify mutable collections
    mutableList += 4
    mutableSet += 4
    mutableMap += ("d" -> 4)
    
    println(s"After modification:")
    println(s"Mutable List: $mutableList")
    println(s"Mutable Set: $mutableSet")
    println(s"Mutable Map: $mutableMap")
  }
}

List (List)

List Basic Operations

scala
object ListOperations {
  def main(args: Array[String]): Unit = {
    // Create List
    val list1 = List(1, 2, 3, 4, 5)
    val list2 = 1 :: 2 :: 3 :: 4 :: 5 :: Nil
    val list3 = List.range(1, 6)
    val list4 = List.fill(5)(0)
    val list5 = List.tabulate(5)(i => i * i)
    
    println(s"list1: $list1")
    println(s"list2: $list2")
    println(s"list3: $list3")
    println(s"list4: $list4")
    println(s"list5: $list5")
    
    // Basic operations
    println(s"Head: ${list1.head}")
    println(s"Tail: ${list1.tail}")
    println(s"Last: ${list1.last}")
    println(s"Init: ${list1.init}")
    println(s"Length: ${list1.length}")
    println(s"Is empty: ${list1.isEmpty}")
    
    // Add elements
    val newList1 = 0 :: list1  // Prepend
    val newList2 = list1 :+ 6  // Append
    val newList3 = list1 ++ List(6, 7, 8)  // Concatenate
    
    println(s"Prepend 0: $newList1")
    println(s"Append 6: $newList2")
    println(s"Concatenate: $newList3")
    
    // Access elements
    println(s"Element at index 2: ${list1(2)}")
    println(s"Take 3: ${list1.take(3)}")
    println(s"Drop 2: ${list1.drop(2)}")
    println(s"Slice(1, 4): ${list1.slice(1, 4)}")
  }
}

List Higher-Order Functions

scala
object ListHigherOrderFunctions {
  def main(args: Array[String]): Unit = {
    val numbers = List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
    val words = List("scala", "java", "python", "javascript")
    
    // map - transform
    val doubled = numbers.map(_ * 2)
    val lengths = words.map(_.length)
    println(s"Doubled: $doubled")
    println(s"Word lengths: $lengths")
    
    // filter - filter
    val evens = numbers.filter(_ % 2 == 0)
    val longWords = words.filter(_.length > 4)
    println(s"Even numbers: $evens")
    println(s"Long words: $longWords")
    
    // flatMap - flatten and map
    val chars = words.flatMap(_.toList)
    val pairs = numbers.flatMap(x => List(x, x * 10))
    println(s"All characters: $chars")
    println(s"Pairs: $pairs")
    
    // reduce and fold
    val sum = numbers.reduce(_ + _)
    val product = numbers.fold(1)(_ * _)
    val max = numbers.reduce(_ max _)
    println(s"Sum: $sum")
    println(s"Product: $product")
    println(s"Max: $max")
    
    // scan - scan (keeps intermediate results)
    val runningSum = numbers.scanLeft(0)(_ + _)
    val runningProduct = numbers.scanLeft(1)(_ * _)
    println(s"Running sum: $runningSum")
    println(s"Running product: $runningProduct")
    
    // Grouping and partitioning
    val (evens2, odds) = numbers.partition(_ % 2 == 0)
    val grouped = numbers.groupBy(_ % 3)
    println(s"Evens: $evens2, Odds: $odds")
    println(s"Grouped by remainder: $grouped")
    
    // Sorting
    val shuffled = List(5, 2, 8, 1, 9, 3)
    println(s"Sorted: ${shuffled.sorted}")
    println(s"Sorted descending: ${shuffled.sortWith(_ > _)}")
    println(s"Sort by length: ${words.sortBy(_.length)}")
  }
}

Vector (Vector)

Vector Characteristics

scala
object VectorOperations {
  def main(args: Array[String]): Unit = {
    // Vector creation
    val vector1 = Vector(1, 2, 3, 4, 5)
    val vector2 = Vector.range(1, 6)
    val vector3 = Vector.fill(5)(0)
    
    println(s"vector1: $vector1")
    println(s"vector2: $vector2")
    println(s"vector3: $vector3")
    
    // Vector advantages: efficient random access and updates
    val largeVector = Vector.range(1, 1000000)
    
    // Random access - O(log n) but practically close to O(1)
    val element = largeVector(500000)
    println(s"Element at 500000: $element")
    
    // Update operation - creates new Vector
    val updated = vector1.updated(2, 99)
    println(s"Original: $vector1")
    println(s"Updated: $updated")
    
    // Add elements
    val prepended = 0 +: vector1
    val appended = vector1 :+ 6
    println(s"Prepended: $prepended")
    println(s"Appended: $appended")
    
    // Vector vs List performance comparison
    def timeOperation[T](operation: => T): Long = {
      val start = System.nanoTime()
      operation
      val end = System.nanoTime()
      end - start
    }
    
    val size = 100000
    val list = List.range(1, size)
    val vector = Vector.range(1, size)
    
    // Random access performance
    val listAccessTime = timeOperation(list(size / 2))
    val vectorAccessTime = timeOperation(vector(size / 2))
    
    println(s"List random access: ${listAccessTime}ns")
    println(s"Vector random access: ${vectorAccessTime}ns")
    
    // Prepend performance
    val listPrependTime = timeOperation(0 :: list)
    val vectorPrependTime = timeOperation(0 +: vector)
    
    println(s"List prepend: ${listPrependTime}ns")
    println(s"Vector prepend: ${vectorPrependTime}ns")
  }
}

Set (Set)

Set Operations

scala
object SetOperations {
  def main(args: Array[String]): Unit = {
    // Create Set
    val set1 = Set(1, 2, 3, 4, 5)
    val set2 = Set(4, 5, 6, 7, 8)
    val set3 = Set(1, 1, 2, 2, 3, 3)  // Auto deduplication
    
    println(s"set1: $set1")
    println(s"set2: $set2")
    println(s"set3 (duplicates removed): $set3")
    
    // Basic operations
    println(s"Contains 3: ${set1.contains(3)}")
    println(s"Size: ${set1.size}")
    println(s"Is empty: ${set1.isEmpty}")
    
    // Add and delete elements
    val added = set1 + 6
    val removed = set1 - 3
    val multipleAdded = set1 ++ Set(6, 7, 8)
    val multipleRemoved = set1 -- Set(1, 2)
    
    println(s"Added 6: $added")
    println(s"Removed 3: $removed")
    println(s"Multiple added: $multipleAdded")
    println(s"Multiple removed: $multipleRemoved")
    
    // Set operations
    val union = set1 union set2  // or set1 | set2
    val intersection = set1 intersect set2  // or set1 & set2
    val difference = set1 diff set2  // or set1 &~ set2
    
    println(s"Union: $union")
    println(s"Intersection: $intersection")
    println(s"Difference: $difference")
    
    // Subset and superset
    val subset = Set(1, 2, 3)
    println(s"$subset is subset of $set1: ${subset.subsetOf(set1)}")
    println(s"$set1 is superset of $subset: ${set1.subsetOf(subset)}")
    
    // Different types of Set
    import scala.collection.mutable
    val mutableSet = mutable.Set(1, 2, 3)
    mutableSet += 4
    mutableSet -= 1
    println(s"Mutable set: $mutableSet")
    
    // SortedSet - ordered set
    import scala.collection.immutable.SortedSet
    val sortedSet = SortedSet(5, 1, 3, 2, 4)
    println(s"Sorted set: $sortedSet")
  }
}

Map (Mapping)

Map Basic Operations

scala
object MapOperations {
  def main(args: Array[String]): Unit = {
    // Map creation
    val map1 = Map("a" -> 1, "b" -> 2, "c" -> 3)
    val map2 = Map(("x", 10), ("y", 20), ("z", 30))
    val map3 = Map.empty[String, Int]
    
    println(s"map1: $map1")
    println(s"map2: $map2")
    println(s"map3: $map3")
    
    // Access elements
    println(s"Value for 'a': ${map1("a")}")
    println(s"Get 'a': ${map1.get("a")}")
    println(s"Get 'd': ${map1.get("d")}")
    println(s"Get 'd' with default: ${map1.getOrElse("d", 0)}")
    
    // Check key existence
    println(s"Contains 'b': ${map1.contains("b")}")
    println(s"Contains 'd': ${map1.contains("d")}")
    
    // Add and update
    val updated = map1 + ("d" -> 4)
    val multipleUpdated = map1 ++ Map("d" -> 4, "e" -> 5)
    val removed = map1 - "a"
    val multipleRemoved = map1 -- List("a", "b")
    
    println(s"Updated: $updated")
    println(s"Multiple updated: $multipleUpdated")
    println(s"Removed: $removed")
    println(s"Multiple removed: $multipleRemoved")
    
    // Keys and values
    println(s"Keys: ${map1.keys}")
    println(s"Values: ${map1.values}")
    println(s"Key-value pairs: ${map1.toList}")
    
    // Map operations
    val doubled = map1.map { case (k, v) => k -> (v * 2) }
    val filtered = map1.filter { case (k, v) => v > 1 }
    
    println(s"Doubled values: $doubled")
    println(s"Filtered (value > 1): $filtered")
  }
}

Map Advanced Operations

scala
object AdvancedMapOperations {
  def main(args: Array[String]): Unit = {
    val scores = Map(
      "Alice" -> 95,
      "Bob" -> 87,
      "Charlie" -> 92,
      "Diana" -> 78
    )
    
    // Lookup operations
    val topStudent = scores.maxBy(_._2)
    val bottomStudent = scores.minBy(_._2)
    val averageScore = scores.values.sum.toDouble / scores.size
    
    println(s"Top student: $topStudent")
    println(s"Bottom student: $bottomStudent")
    println(s"Average score: $averageScore")
    
    // Grouping operations
    val gradeRanges = scores.groupBy { case (_, score) =>
      score match {
        case s if s >= 90 => "A"
        case s if s >= 80 => "B"
        case s if s >= 70 => "C"
        case _ => "F"
      }
    }
    
    println("Grade distribution:")
    gradeRanges.foreach { case (grade, students) =>
      println(s"Grade $grade: ${students.keys.mkString(", ")}")
    }
    
    // Map merging
    val bonusPoints = Map("Alice" -> 5, "Bob" -> 3, "Eve" -> 10)
    
    val finalScores = scores ++ bonusPoints.map { case (name, bonus) =>
      name -> (scores.getOrElse(name, 0) + bonus)
    }
    
    println(s"Final scores: $finalScores")
    
    // Nested Map
    val studentData = Map(
      "Alice" -> Map("age" -> 20, "grade" -> 95, "year" -> 3),
      "Bob" -> Map("age" -> 19, "grade" -> 87, "year" -> 2)
    )
    
    println(s"Alice's age: ${studentData("Alice")("age")}")
    
    // Mutable Map
    import scala.collection.mutable
    val mutableScores = mutable.Map("Alice" -> 95, "Bob" -> 87)
    
    mutableScores("Charlie") = 92  // Add new element
    mutableScores("Alice") = 98    // Update existing element
    mutableScores.remove("Bob")    // Delete element
    
    println(s"Mutable scores: $mutableScores")
  }
}

Collection Conversions

Conversions Between Collections

scala
object CollectionConversions {
  def main(args: Array[String]): Unit = {
    val list = List(1, 2, 3, 4, 5, 2, 3)
    val array = Array(1, 2, 3, 4, 5)
    val set = Set(1, 2, 3, 4, 5)
    val map = Map("a" -> 1, "b" -> 2, "c" -> 3)
    
    // From List
    println("From List:")
    println(s"List to Vector: ${list.toVector}")
    println(s"List to Set: ${list.toSet}")  // Deduplication
    println(s"List to Array: ${list.toArray.mkString(", ")}")
    
    // From Array
    println("\nFrom Array:")
    println(s"Array to List: ${array.toList}")
    println(s"Array to Vector: ${array.toVector}")
    println(s"Array to Set: ${array.toSet}")
    
    // From Set
    println("\nFrom Set:")
    println(s"Set to List: ${set.toList}")
    println(s"Set to Vector: ${set.toVector}")
    println(s"Set to Array: ${set.toArray.mkString(", ")}")
    
    // From Map
    println("\nFrom Map:")
    println(s"Map to List: ${map.toList}")
    println(s"Map keys to Set: ${map.keySet}")
    println(s"Map values to List: ${map.values.toList}")
    
    // String conversions
    val string = "hello"
    println(s"\nString to List: ${string.toList}")
    println(s"String to Vector: ${string.toVector}")
    println(s"String to Set: ${string.toSet}")
    
    // Range conversions
    val range = 1 to 10
    println(s"\nRange to List: ${range.toList}")
    println(s"Range to Vector: ${range.toVector}")
    println(s"Range to Set: ${range.toSet}")
  }
}

Collection Performance Comparison

Performance Characteristics

scala
object CollectionPerformance {
  def main(args: Array[String]): Unit = {
    val size = 100000
    
    def timeOperation[T](name: String)(operation: => T): T = {
      val start = System.nanoTime()
      val result = operation
      val end = System.nanoTime()
      println(f"$name%20s: ${(end - start) / 1000000}%6d ms")
      result
    }
    
    println("Collection Performance Comparison:")
    println("=" * 50)
    
    // Creation performance
    println("\nCreation Performance:")
    val list = timeOperation("List creation") { List.range(1, size) }
    val vector = timeOperation("Vector creation") { Vector.range(1, size) }
    val array = timeOperation("Array creation") { Array.range(1, size) }
    
    // Random access performance
    println("\nRandom Access Performance:")
    val index = size / 2
    timeOperation("List access") { list(index) }
    timeOperation("Vector access") { vector(index) }
    timeOperation("Array access") { array(index) }
    
    // Prepend performance
    println("\nPrepend Performance:")
    timeOperation("List prepend") { 0 :: list }
    timeOperation("Vector prepend") { 0 +: vector }
    
    // Append performance
    println("\nAppend Performance:")
    timeOperation("List append") { list :+ (size + 1) }
    timeOperation("Vector append") { vector :+ (size + 1) }
    
    // Traversal performance
    println("\nIteration Performance:")
    timeOperation("List sum") { list.sum }
    timeOperation("Vector sum") { vector.sum }
    timeOperation("Array sum") { array.sum }
    
    // Memory usage recommendations
    println("\nMemory and Performance Guidelines:")
    println("List: Best for sequential access, prepending")
    println("Vector: Best for random access, general purpose")
    println("Array: Best for performance-critical code, interop with Java")
    println("Set: Best for membership testing, uniqueness")
    println("Map: Best for key-value lookups")
  }
}

Practical Application Examples

Data Processing Pipeline

scala
object DataProcessingPipeline {
  case class Person(name: String, age: Int, city: String, salary: Double)
  
  def main(args: Array[String]): Unit = {
    val people = List(
      Person("Alice", 25, "New York", 75000),
      Person("Bob", 30, "San Francisco", 95000),
      Person("Charlie", 35, "New York", 85000),
      Person("Diana", 28, "Boston", 70000),
      Person("Eve", 32, "San Francisco", 105000),
      Person("Frank", 29, "Boston", 68000)
    )
    
    println("Original data:")
    people.foreach(println)
    
    // Data processing pipeline
    val analysis = people
      .filter(_.age >= 28)  // Filter by age
      .groupBy(_.city)      // Group by city
      .view.mapValues { cityPeople =>
        Map(
          "count" -> cityPeople.size,
          "avgAge" -> cityPeople.map(_.age).sum.toDouble / cityPeople.size,
          "avgSalary" -> cityPeople.map(_.salary).sum / cityPeople.size,
          "totalSalary" -> cityPeople.map(_.salary).sum
        )
      }.toMap
    
    println("\nAnalysis by city (age >= 28):")
    analysis.foreach { case (city, stats) =>
      println(s"$city:")
      stats.foreach { case (metric, value) =>
        println(f"  $metric: $value%.2f")
      }
    }
    
    // Salary statistics
    val salaryStats = people.map(_.salary)
    val sortedSalaries = salaryStats.sorted
    val median = if (sortedSalaries.length % 2 == 0) {
      (sortedSalaries(sortedSalaries.length / 2 - 1) + sortedSalaries(sortedSalaries.length / 2)) / 2
    } else {
      sortedSalaries(sortedSalaries.length / 2)
    }
    
    println(f"\nSalary Statistics:")
    println(f"Average: ${salaryStats.sum / salaryStats.length}%.2f")
    println(f"Median: $median%.2f")
    println(f"Min: ${salaryStats.min}%.2f")
    println(f"Max: ${salaryStats.max}%.2f")
    
    // Age distribution
    val ageGroups = people.groupBy { person =>
      person.age match {
        case age if age < 30 => "20s"
        case age if age < 40 => "30s"
        case _ => "40+"
      }
    }
    
    println("\nAge Distribution:")
    ageGroups.foreach { case (group, people) =>
      println(s"$group: ${people.map(_.name).mkString(", ")}")
    }
  }
}

Cache System

scala
import scala.collection.mutable

object CacheSystem {
  class LRUCache[K, V](maxSize: Int) {
    private val cache = mutable.LinkedHashMap[K, V]()
    
    def get(key: K): Option[V] = {
      cache.remove(key) match {
        case Some(value) =>
          cache(key) = value  // Move to end (most recently used)
          Some(value)
        case None => None
      }
    }
    
    def put(key: K, value: V): Unit = {
      cache.remove(key)  // If exists, delete it first
      cache(key) = value
      
      // If exceeds max size, delete oldest element
      if (cache.size > maxSize) {
        cache.remove(cache.head._1)
      }
    }
    
    def size: Int = cache.size
    def keys: Set[K] = cache.keySet.toSet
    
    override def toString: String = cache.toString()
  }
  
  def main(args: Array[String]): Unit = {
    val cache = new LRUCache[String, Int](3)
    
    // Add elements
    cache.put("a", 1)
    cache.put("b", 2)
    cache.put("c", 3)
    println(s"After adding a, b, c: $cache")
    
    // Access element
    println(s"Get 'a': ${cache.get("a")}")
    println(s"After accessing 'a': $cache")
    
    // Add new element, should delete oldest 'b'
    cache.put("d", 4)
    println(s"After adding 'd': $cache")
    
    // Test cache hit rate
    val requests = List("a", "b", "c", "d", "a", "e", "f", "a")
    var hits = 0
    var misses = 0
    
    requests.foreach { key =>
      cache.get(key) match {
        case Some(_) => hits += 1
        case None => 
          misses += 1
          cache.put(key, key.hashCode)
      }
    }
    
    println(s"\nCache performance:")
    println(s"Hits: $hits, Misses: $misses")
    println(s"Hit rate: ${hits.toDouble / (hits + misses) * 100}%")
  }
}

Best Practices

  1. Choose the right collection type:

    • Need sequential access: List
    • Need random access: Vector
    • Need uniqueness: Set
    • Need key-value mapping: Map
  2. Immutable vs Mutable:

    • Use immutable collections by default
    • Consider mutable collections for performance-critical scenarios
    • Prioritize immutable for functional programming
  3. Performance considerations:

    • List suitable for prepend operations
    • Vector suitable for random access
    • Array suitable for performance-critical scenarios
  4. Memory efficiency:

    • Use view for lazy evaluation
    • Avoid unnecessary intermediate collections
    • Consider using Iterator to process large data
  5. Functional programming:

    • Use map, filter, reduce and other higher-order functions
    • Chain operations for readability
    • Avoid side effects

The Scala collection framework provides powerful and flexible data processing capabilities, mastering these collection types and operation methods is key to writing efficient Scala programs.

Content is for learning and research only.