Scala Collections
The Scala collection framework is one of the language's most powerful features, providing rich data structures and operation methods. Collections are divided into two major categories: mutable and immutable.
Collection Hierarchy
Collection Types Overview
scala
object CollectionOverview {
def main(args: Array[String]): Unit = {
// Immutable collections (default)
val list = List(1, 2, 3, 4, 5)
val vector = Vector(1, 2, 3, 4, 5)
val set = Set(1, 2, 3, 4, 5)
val map = Map("a" -> 1, "b" -> 2, "c" -> 3)
println(s"List: $list")
println(s"Vector: $vector")
println(s"Set: $set")
println(s"Map: $map")
// Mutable collections
import scala.collection.mutable
val mutableList = mutable.ListBuffer(1, 2, 3)
val mutableSet = mutable.Set(1, 2, 3)
val mutableMap = mutable.Map("a" -> 1, "b" -> 2)
println(s"Mutable List: $mutableList")
println(s"Mutable Set: $mutableSet")
println(s"Mutable Map: $mutableMap")
// Modify mutable collections
mutableList += 4
mutableSet += 4
mutableMap += ("d" -> 4)
println(s"After modification:")
println(s"Mutable List: $mutableList")
println(s"Mutable Set: $mutableSet")
println(s"Mutable Map: $mutableMap")
}
}List (List)
List Basic Operations
scala
object ListOperations {
def main(args: Array[String]): Unit = {
// Create List
val list1 = List(1, 2, 3, 4, 5)
val list2 = 1 :: 2 :: 3 :: 4 :: 5 :: Nil
val list3 = List.range(1, 6)
val list4 = List.fill(5)(0)
val list5 = List.tabulate(5)(i => i * i)
println(s"list1: $list1")
println(s"list2: $list2")
println(s"list3: $list3")
println(s"list4: $list4")
println(s"list5: $list5")
// Basic operations
println(s"Head: ${list1.head}")
println(s"Tail: ${list1.tail}")
println(s"Last: ${list1.last}")
println(s"Init: ${list1.init}")
println(s"Length: ${list1.length}")
println(s"Is empty: ${list1.isEmpty}")
// Add elements
val newList1 = 0 :: list1 // Prepend
val newList2 = list1 :+ 6 // Append
val newList3 = list1 ++ List(6, 7, 8) // Concatenate
println(s"Prepend 0: $newList1")
println(s"Append 6: $newList2")
println(s"Concatenate: $newList3")
// Access elements
println(s"Element at index 2: ${list1(2)}")
println(s"Take 3: ${list1.take(3)}")
println(s"Drop 2: ${list1.drop(2)}")
println(s"Slice(1, 4): ${list1.slice(1, 4)}")
}
}List Higher-Order Functions
scala
object ListHigherOrderFunctions {
def main(args: Array[String]): Unit = {
val numbers = List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
val words = List("scala", "java", "python", "javascript")
// map - transform
val doubled = numbers.map(_ * 2)
val lengths = words.map(_.length)
println(s"Doubled: $doubled")
println(s"Word lengths: $lengths")
// filter - filter
val evens = numbers.filter(_ % 2 == 0)
val longWords = words.filter(_.length > 4)
println(s"Even numbers: $evens")
println(s"Long words: $longWords")
// flatMap - flatten and map
val chars = words.flatMap(_.toList)
val pairs = numbers.flatMap(x => List(x, x * 10))
println(s"All characters: $chars")
println(s"Pairs: $pairs")
// reduce and fold
val sum = numbers.reduce(_ + _)
val product = numbers.fold(1)(_ * _)
val max = numbers.reduce(_ max _)
println(s"Sum: $sum")
println(s"Product: $product")
println(s"Max: $max")
// scan - scan (keeps intermediate results)
val runningSum = numbers.scanLeft(0)(_ + _)
val runningProduct = numbers.scanLeft(1)(_ * _)
println(s"Running sum: $runningSum")
println(s"Running product: $runningProduct")
// Grouping and partitioning
val (evens2, odds) = numbers.partition(_ % 2 == 0)
val grouped = numbers.groupBy(_ % 3)
println(s"Evens: $evens2, Odds: $odds")
println(s"Grouped by remainder: $grouped")
// Sorting
val shuffled = List(5, 2, 8, 1, 9, 3)
println(s"Sorted: ${shuffled.sorted}")
println(s"Sorted descending: ${shuffled.sortWith(_ > _)}")
println(s"Sort by length: ${words.sortBy(_.length)}")
}
}Vector (Vector)
Vector Characteristics
scala
object VectorOperations {
def main(args: Array[String]): Unit = {
// Vector creation
val vector1 = Vector(1, 2, 3, 4, 5)
val vector2 = Vector.range(1, 6)
val vector3 = Vector.fill(5)(0)
println(s"vector1: $vector1")
println(s"vector2: $vector2")
println(s"vector3: $vector3")
// Vector advantages: efficient random access and updates
val largeVector = Vector.range(1, 1000000)
// Random access - O(log n) but practically close to O(1)
val element = largeVector(500000)
println(s"Element at 500000: $element")
// Update operation - creates new Vector
val updated = vector1.updated(2, 99)
println(s"Original: $vector1")
println(s"Updated: $updated")
// Add elements
val prepended = 0 +: vector1
val appended = vector1 :+ 6
println(s"Prepended: $prepended")
println(s"Appended: $appended")
// Vector vs List performance comparison
def timeOperation[T](operation: => T): Long = {
val start = System.nanoTime()
operation
val end = System.nanoTime()
end - start
}
val size = 100000
val list = List.range(1, size)
val vector = Vector.range(1, size)
// Random access performance
val listAccessTime = timeOperation(list(size / 2))
val vectorAccessTime = timeOperation(vector(size / 2))
println(s"List random access: ${listAccessTime}ns")
println(s"Vector random access: ${vectorAccessTime}ns")
// Prepend performance
val listPrependTime = timeOperation(0 :: list)
val vectorPrependTime = timeOperation(0 +: vector)
println(s"List prepend: ${listPrependTime}ns")
println(s"Vector prepend: ${vectorPrependTime}ns")
}
}Set (Set)
Set Operations
scala
object SetOperations {
def main(args: Array[String]): Unit = {
// Create Set
val set1 = Set(1, 2, 3, 4, 5)
val set2 = Set(4, 5, 6, 7, 8)
val set3 = Set(1, 1, 2, 2, 3, 3) // Auto deduplication
println(s"set1: $set1")
println(s"set2: $set2")
println(s"set3 (duplicates removed): $set3")
// Basic operations
println(s"Contains 3: ${set1.contains(3)}")
println(s"Size: ${set1.size}")
println(s"Is empty: ${set1.isEmpty}")
// Add and delete elements
val added = set1 + 6
val removed = set1 - 3
val multipleAdded = set1 ++ Set(6, 7, 8)
val multipleRemoved = set1 -- Set(1, 2)
println(s"Added 6: $added")
println(s"Removed 3: $removed")
println(s"Multiple added: $multipleAdded")
println(s"Multiple removed: $multipleRemoved")
// Set operations
val union = set1 union set2 // or set1 | set2
val intersection = set1 intersect set2 // or set1 & set2
val difference = set1 diff set2 // or set1 &~ set2
println(s"Union: $union")
println(s"Intersection: $intersection")
println(s"Difference: $difference")
// Subset and superset
val subset = Set(1, 2, 3)
println(s"$subset is subset of $set1: ${subset.subsetOf(set1)}")
println(s"$set1 is superset of $subset: ${set1.subsetOf(subset)}")
// Different types of Set
import scala.collection.mutable
val mutableSet = mutable.Set(1, 2, 3)
mutableSet += 4
mutableSet -= 1
println(s"Mutable set: $mutableSet")
// SortedSet - ordered set
import scala.collection.immutable.SortedSet
val sortedSet = SortedSet(5, 1, 3, 2, 4)
println(s"Sorted set: $sortedSet")
}
}Map (Mapping)
Map Basic Operations
scala
object MapOperations {
def main(args: Array[String]): Unit = {
// Map creation
val map1 = Map("a" -> 1, "b" -> 2, "c" -> 3)
val map2 = Map(("x", 10), ("y", 20), ("z", 30))
val map3 = Map.empty[String, Int]
println(s"map1: $map1")
println(s"map2: $map2")
println(s"map3: $map3")
// Access elements
println(s"Value for 'a': ${map1("a")}")
println(s"Get 'a': ${map1.get("a")}")
println(s"Get 'd': ${map1.get("d")}")
println(s"Get 'd' with default: ${map1.getOrElse("d", 0)}")
// Check key existence
println(s"Contains 'b': ${map1.contains("b")}")
println(s"Contains 'd': ${map1.contains("d")}")
// Add and update
val updated = map1 + ("d" -> 4)
val multipleUpdated = map1 ++ Map("d" -> 4, "e" -> 5)
val removed = map1 - "a"
val multipleRemoved = map1 -- List("a", "b")
println(s"Updated: $updated")
println(s"Multiple updated: $multipleUpdated")
println(s"Removed: $removed")
println(s"Multiple removed: $multipleRemoved")
// Keys and values
println(s"Keys: ${map1.keys}")
println(s"Values: ${map1.values}")
println(s"Key-value pairs: ${map1.toList}")
// Map operations
val doubled = map1.map { case (k, v) => k -> (v * 2) }
val filtered = map1.filter { case (k, v) => v > 1 }
println(s"Doubled values: $doubled")
println(s"Filtered (value > 1): $filtered")
}
}Map Advanced Operations
scala
object AdvancedMapOperations {
def main(args: Array[String]): Unit = {
val scores = Map(
"Alice" -> 95,
"Bob" -> 87,
"Charlie" -> 92,
"Diana" -> 78
)
// Lookup operations
val topStudent = scores.maxBy(_._2)
val bottomStudent = scores.minBy(_._2)
val averageScore = scores.values.sum.toDouble / scores.size
println(s"Top student: $topStudent")
println(s"Bottom student: $bottomStudent")
println(s"Average score: $averageScore")
// Grouping operations
val gradeRanges = scores.groupBy { case (_, score) =>
score match {
case s if s >= 90 => "A"
case s if s >= 80 => "B"
case s if s >= 70 => "C"
case _ => "F"
}
}
println("Grade distribution:")
gradeRanges.foreach { case (grade, students) =>
println(s"Grade $grade: ${students.keys.mkString(", ")}")
}
// Map merging
val bonusPoints = Map("Alice" -> 5, "Bob" -> 3, "Eve" -> 10)
val finalScores = scores ++ bonusPoints.map { case (name, bonus) =>
name -> (scores.getOrElse(name, 0) + bonus)
}
println(s"Final scores: $finalScores")
// Nested Map
val studentData = Map(
"Alice" -> Map("age" -> 20, "grade" -> 95, "year" -> 3),
"Bob" -> Map("age" -> 19, "grade" -> 87, "year" -> 2)
)
println(s"Alice's age: ${studentData("Alice")("age")}")
// Mutable Map
import scala.collection.mutable
val mutableScores = mutable.Map("Alice" -> 95, "Bob" -> 87)
mutableScores("Charlie") = 92 // Add new element
mutableScores("Alice") = 98 // Update existing element
mutableScores.remove("Bob") // Delete element
println(s"Mutable scores: $mutableScores")
}
}Collection Conversions
Conversions Between Collections
scala
object CollectionConversions {
def main(args: Array[String]): Unit = {
val list = List(1, 2, 3, 4, 5, 2, 3)
val array = Array(1, 2, 3, 4, 5)
val set = Set(1, 2, 3, 4, 5)
val map = Map("a" -> 1, "b" -> 2, "c" -> 3)
// From List
println("From List:")
println(s"List to Vector: ${list.toVector}")
println(s"List to Set: ${list.toSet}") // Deduplication
println(s"List to Array: ${list.toArray.mkString(", ")}")
// From Array
println("\nFrom Array:")
println(s"Array to List: ${array.toList}")
println(s"Array to Vector: ${array.toVector}")
println(s"Array to Set: ${array.toSet}")
// From Set
println("\nFrom Set:")
println(s"Set to List: ${set.toList}")
println(s"Set to Vector: ${set.toVector}")
println(s"Set to Array: ${set.toArray.mkString(", ")}")
// From Map
println("\nFrom Map:")
println(s"Map to List: ${map.toList}")
println(s"Map keys to Set: ${map.keySet}")
println(s"Map values to List: ${map.values.toList}")
// String conversions
val string = "hello"
println(s"\nString to List: ${string.toList}")
println(s"String to Vector: ${string.toVector}")
println(s"String to Set: ${string.toSet}")
// Range conversions
val range = 1 to 10
println(s"\nRange to List: ${range.toList}")
println(s"Range to Vector: ${range.toVector}")
println(s"Range to Set: ${range.toSet}")
}
}Collection Performance Comparison
Performance Characteristics
scala
object CollectionPerformance {
def main(args: Array[String]): Unit = {
val size = 100000
def timeOperation[T](name: String)(operation: => T): T = {
val start = System.nanoTime()
val result = operation
val end = System.nanoTime()
println(f"$name%20s: ${(end - start) / 1000000}%6d ms")
result
}
println("Collection Performance Comparison:")
println("=" * 50)
// Creation performance
println("\nCreation Performance:")
val list = timeOperation("List creation") { List.range(1, size) }
val vector = timeOperation("Vector creation") { Vector.range(1, size) }
val array = timeOperation("Array creation") { Array.range(1, size) }
// Random access performance
println("\nRandom Access Performance:")
val index = size / 2
timeOperation("List access") { list(index) }
timeOperation("Vector access") { vector(index) }
timeOperation("Array access") { array(index) }
// Prepend performance
println("\nPrepend Performance:")
timeOperation("List prepend") { 0 :: list }
timeOperation("Vector prepend") { 0 +: vector }
// Append performance
println("\nAppend Performance:")
timeOperation("List append") { list :+ (size + 1) }
timeOperation("Vector append") { vector :+ (size + 1) }
// Traversal performance
println("\nIteration Performance:")
timeOperation("List sum") { list.sum }
timeOperation("Vector sum") { vector.sum }
timeOperation("Array sum") { array.sum }
// Memory usage recommendations
println("\nMemory and Performance Guidelines:")
println("List: Best for sequential access, prepending")
println("Vector: Best for random access, general purpose")
println("Array: Best for performance-critical code, interop with Java")
println("Set: Best for membership testing, uniqueness")
println("Map: Best for key-value lookups")
}
}Practical Application Examples
Data Processing Pipeline
scala
object DataProcessingPipeline {
case class Person(name: String, age: Int, city: String, salary: Double)
def main(args: Array[String]): Unit = {
val people = List(
Person("Alice", 25, "New York", 75000),
Person("Bob", 30, "San Francisco", 95000),
Person("Charlie", 35, "New York", 85000),
Person("Diana", 28, "Boston", 70000),
Person("Eve", 32, "San Francisco", 105000),
Person("Frank", 29, "Boston", 68000)
)
println("Original data:")
people.foreach(println)
// Data processing pipeline
val analysis = people
.filter(_.age >= 28) // Filter by age
.groupBy(_.city) // Group by city
.view.mapValues { cityPeople =>
Map(
"count" -> cityPeople.size,
"avgAge" -> cityPeople.map(_.age).sum.toDouble / cityPeople.size,
"avgSalary" -> cityPeople.map(_.salary).sum / cityPeople.size,
"totalSalary" -> cityPeople.map(_.salary).sum
)
}.toMap
println("\nAnalysis by city (age >= 28):")
analysis.foreach { case (city, stats) =>
println(s"$city:")
stats.foreach { case (metric, value) =>
println(f" $metric: $value%.2f")
}
}
// Salary statistics
val salaryStats = people.map(_.salary)
val sortedSalaries = salaryStats.sorted
val median = if (sortedSalaries.length % 2 == 0) {
(sortedSalaries(sortedSalaries.length / 2 - 1) + sortedSalaries(sortedSalaries.length / 2)) / 2
} else {
sortedSalaries(sortedSalaries.length / 2)
}
println(f"\nSalary Statistics:")
println(f"Average: ${salaryStats.sum / salaryStats.length}%.2f")
println(f"Median: $median%.2f")
println(f"Min: ${salaryStats.min}%.2f")
println(f"Max: ${salaryStats.max}%.2f")
// Age distribution
val ageGroups = people.groupBy { person =>
person.age match {
case age if age < 30 => "20s"
case age if age < 40 => "30s"
case _ => "40+"
}
}
println("\nAge Distribution:")
ageGroups.foreach { case (group, people) =>
println(s"$group: ${people.map(_.name).mkString(", ")}")
}
}
}Cache System
scala
import scala.collection.mutable
object CacheSystem {
class LRUCache[K, V](maxSize: Int) {
private val cache = mutable.LinkedHashMap[K, V]()
def get(key: K): Option[V] = {
cache.remove(key) match {
case Some(value) =>
cache(key) = value // Move to end (most recently used)
Some(value)
case None => None
}
}
def put(key: K, value: V): Unit = {
cache.remove(key) // If exists, delete it first
cache(key) = value
// If exceeds max size, delete oldest element
if (cache.size > maxSize) {
cache.remove(cache.head._1)
}
}
def size: Int = cache.size
def keys: Set[K] = cache.keySet.toSet
override def toString: String = cache.toString()
}
def main(args: Array[String]): Unit = {
val cache = new LRUCache[String, Int](3)
// Add elements
cache.put("a", 1)
cache.put("b", 2)
cache.put("c", 3)
println(s"After adding a, b, c: $cache")
// Access element
println(s"Get 'a': ${cache.get("a")}")
println(s"After accessing 'a': $cache")
// Add new element, should delete oldest 'b'
cache.put("d", 4)
println(s"After adding 'd': $cache")
// Test cache hit rate
val requests = List("a", "b", "c", "d", "a", "e", "f", "a")
var hits = 0
var misses = 0
requests.foreach { key =>
cache.get(key) match {
case Some(_) => hits += 1
case None =>
misses += 1
cache.put(key, key.hashCode)
}
}
println(s"\nCache performance:")
println(s"Hits: $hits, Misses: $misses")
println(s"Hit rate: ${hits.toDouble / (hits + misses) * 100}%")
}
}Best Practices
Choose the right collection type:
- Need sequential access:
List - Need random access:
Vector - Need uniqueness:
Set - Need key-value mapping:
Map
- Need sequential access:
Immutable vs Mutable:
- Use immutable collections by default
- Consider mutable collections for performance-critical scenarios
- Prioritize immutable for functional programming
Performance considerations:
Listsuitable for prepend operationsVectorsuitable for random accessArraysuitable for performance-critical scenarios
Memory efficiency:
- Use
viewfor lazy evaluation - Avoid unnecessary intermediate collections
- Consider using
Iteratorto process large data
- Use
Functional programming:
- Use
map,filter,reduceand other higher-order functions - Chain operations for readability
- Avoid side effects
- Use
The Scala collection framework provides powerful and flexible data processing capabilities, mastering these collection types and operation methods is key to writing efficient Scala programs.