Skip to content

Scala Iterators

Iterators are important tools in Scala for traversing collection elements. They provide a lazy (lazy) way to process data, making them particularly suitable for handling large datasets or infinite sequences.

Iterator Basics

Creating Iterators

scala
object IteratorCreation {
  def main(args: Array[String]): Unit = {
    // Create iterators from collections
    val list = List(1, 2, 3, 4, 5)
    val iterator1 = list.iterator

    // Directly create iterators
    val iterator2 = Iterator(1, 2, 3, 4, 5)
    val iterator3 = Iterator.range(1, 6)
    val iterator4 = Iterator.fill(5)(0)
    val iterator5 = Iterator.tabulate(5)(i => i * i)

    println("Iterators created from different sources:")
    println(s"From list: ${iterator1.toList}")
    println(s"Direct creation: ${iterator2.toList}")
    println(s"Range: ${iterator3.toList}")
    println(s"Fill: ${iterator4.toList}")
    println(s"Tabulate: ${iterator5.toList}")

    // Empty iterator
    val emptyIterator = Iterator.empty[Int]
    println(s"Empty iterator: ${emptyIterator.toList}")

    // Single element iterator
    val singleIterator = Iterator.single(42)
    println(s"Single element: ${singleIterator.toList}")

    // Infinite iterator
    val infiniteIterator = Iterator.from(1)  // Infinite sequence starting from 1
    println(s"First 10 from infinite: ${infiniteIterator.take(10).toList}")

    // Repeating element iterator
    val repeatedIterator = Iterator.continually("hello")
    println(s"First 5 repeated: ${repeatedIterator.take(5).toList}")
  }
}

Basic Iterator Operations

scala
object BasicIteratorOperations {
  def main(args: Array[String]): Unit = {
    val numbers = Iterator(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)

    // Check iterator status
    println(s"Has next: ${numbers.hasNext}")

    // Get next element
    if (numbers.hasNext) {
      println(s"Next element: ${numbers.next()}")
    }

    // Note: Iterator is one-time use
    println(s"Remaining elements: ${numbers.toList}")

    // Recreate iterator for other operations
    val numbers2 = Iterator(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)

    // Use foreach to traverse
    print("Foreach: ")
    numbers2.foreach(x => print(s"$x "))
    println()

    // Use while loop to traverse
    val numbers3 = Iterator(1, 2, 3, 4, 5)
    print("While loop: ")
    while (numbers3.hasNext) {
      print(s"${numbers3.next()} ")
    }
    println()

    // Use for loop to traverse
    val numbers4 = Iterator(1, 2, 3, 4, 5)
    print("For loop: ")
    for (num <- numbers4) {
      print(s"$num ")
    }
    println()
  }
}

Iterator Transformation Operations

Map and Filter

scala
object IteratorTransformations {
  def main(args: Array[String]): Unit = {
    // map - Transform each element
    val numbers = Iterator(1, 2, 3, 4, 5)
    val doubled = numbers.map(_ * 2)
    println(s"Doubled: ${doubled.toList}")

    // filter - Filter elements
    val numbers2 = Iterator(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
    val evens = numbers2.filter(_ % 2 == 0)
    println(s"Even numbers: ${evens.toList}")

    // flatMap - Flatten mapping
    val words = Iterator("hello", "world")
    val chars = words.flatMap(_.iterator)
    println(s"All characters: ${chars.toList}")

    // collect - Partial function mapping
    val mixed = Iterator(1, "hello", 2, "world", 3)
    val numbersOnly = mixed.collect { case x: Int => x * 2 }
    println(s"Numbers only (doubled): ${numbersOnly.toList}")

    // take and drop
    val range = Iterator.range(1, 21)
    val first5 = range.take(5)
    println(s"First 5: ${first5.toList}")

    val range2 = Iterator.range(1, 21)
    val after5 = range2.drop(5).take(5)
    println(s"Elements 6-10: ${after5.toList}")

    // takeWhile and dropWhile
    val range3 = Iterator.range(1, 21)
    val lessThan8 = range3.takeWhile(_ < 8)
    println(s"Less than 8: ${lessThan8.toList}")

    val range4 = Iterator.range(1, 21)
    val afterLessThan8 = range4.dropWhile(_ < 8).take(5)
    println(s"After dropping < 8, take 5: ${afterLessThan8.toList}")
  }
}

Aggregation Operations

scala
object IteratorAggregations {
  def main(args: Array[String]): Unit = {
    // reduce operation
    val numbers = Iterator(1, 2, 3, 4, 5)
    val sum = numbers.reduce(_ + _)
    println(s"Sum: $sum")

    // fold operation
    val numbers2 = Iterator(1, 2, 3, 4, 5)
    val product = numbers2.fold(1)(_ * _)
    println(s"Product: $product")

    // Find operations
    val numbers3 = Iterator(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
    val firstEven = numbers3.find(_ % 2 == 0)
    println(s"First even: $firstEven")

    // exists and forall
    val numbers4 = Iterator(2, 4, 6, 8, 10)
    val hasEven = numbers4.exists(_ % 2 == 0)
    println(s"Has even numbers: $hasEven")

    val numbers5 = Iterator(2, 4, 6, 8, 10)
    val allEven = numbers5.forall(_ % 2 == 0)
    println(s"All even: $allEven")

    // count operation
    val numbers6 = Iterator(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
    val evenCount = numbers6.count(_ % 2 == 0)
    println(s"Even count: $evenCount")

    // min and max
    val numbers7 = Iterator(5, 2, 8, 1, 9, 3)
    val min = numbers7.min
    println(s"Min: $min")

    val numbers8 = Iterator(5, 2, 8, 1, 9, 3)
    val max = numbers8.max
    println(s"Max: $max")

    // size and length
    val numbers9 = Iterator(1, 2, 3, 4, 5)
    val size = numbers9.size  // Note: This consumes the iterator
    println(s"Size: $size")
  }
}

Iterator Combinations

Concatenation and Grouping

scala
object IteratorCombination {
  def main(args: Array[String]): Unit = {
    // Concatenate iterators
    val iter1 = Iterator(1, 2, 3)
    val iter2 = Iterator(4, 5, 6)
    val concatenated = iter1 ++ iter2
    println(s"Concatenated: ${concatenated.toList}")

    // zip operation
    val letters = Iterator('a', 'b', 'c', 'd')
    val numbers = Iterator(1, 2, 3, 4, 5)
    val zipped = letters.zip(numbers)
    println(s"Zipped: ${zipped.toList}")

    // zipWithIndex
    val words = Iterator("hello", "world", "scala")
    val indexed = words.zipWithIndex
    println(s"With index: ${indexed.toList}")

    // partition
    val numbers2 = Iterator(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
    val (evens, odds) = numbers2.partition(_ % 2 == 0)
    println(s"Evens: ${evens.toList}")
    println(s"Odds: ${odds.toList}")

    // grouped - Grouping
    val numbers3 = Iterator(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
    val groups = numbers3.grouped(3)
    println("Grouped by 3:")
    groups.foreach(group => println(s"  ${group.toList}"))

    // sliding - Sliding window
    val numbers4 = Iterator(1, 2, 3, 4, 5, 6)
    val sliding = numbers4.sliding(3)
    println("Sliding window of 3:")
    sliding.foreach(window => println(s"  ${window.toList}"))
  }
}

Lazy Evaluation and Performance

Lazy Evaluation Example

scala
object LazyEvaluation {
  def main(args: Array[String]): Unit = {
    println("Demonstrating lazy evaluation:")

    // Create a large iterator, but don't compute immediately
    val largeRange = Iterator.range(1, 1000000)

    // Chain operations - These operations are all lazy
    val processed = largeRange
      .filter { x =>
        println(s"Filtering $x")  // This print statement helps us see when execution happens
        x % 1000 == 0
      }
      .map { x =>
        println(s"Mapping $x")
        x * 2
      }
      .take(5)  // Only take the first 5

    println("Operations defined, but not executed yet")

    // Only when we actually need the result does computation execute
    println("Now executing:")
    val result = processed.toList
    println(s"Result: $result")

    // Compare: If we use List, all operations execute immediately
    println("\nCompare with List (eager evaluation):")
    val eagerResult = (1 until 1000000).toList
      .filter { x =>
        if (x <= 5000) println(s"Eagerly filtering $x")  // Limit output
        x % 1000 == 0
      }
      .map { x =>
        if (x <= 5000) println(s"Eagerly mapping $x")
        x * 2
      }
      .take(5)

    println(s"Eager result: $eagerResult")
  }
}

Performance Comparison

scala
object IteratorPerformance {
  def main(args: Array[String]): Unit = {
    val size = 1000000

    def timeOperation[T](name: String)(operation: => T): T = {
      val start = System.nanoTime()
      val result = operation
      val end = System.nanoTime()
      println(f"$name%25s: ${(end - start) / 1000000}%6d ms")
      result
    }

    println("Performance Comparison: Iterator vs List")
    println("=" * 50)

    // Creation performance
    val list = timeOperation("List creation") {
      (1 to size).toList
    }

    val iterator = timeOperation("Iterator creation") {
      Iterator.range(1, size + 1)
    }

    // Filter and map performance (lazy vs eager)
    timeOperation("List filter+map+take") {
      list.filter(_ % 2 == 0).map(_ * 2).take(100)
    }

    timeOperation("Iterator filter+map+take") {
      Iterator.range(1, size + 1).filter(_ % 2 == 0).map(_ * 2).take(100).toList
    }

    // Memory usage
    println("\nMemory Usage:")
    println("List: Stores all elements in memory")
    println("Iterator: Generates elements on demand")

    // Demonstrate memory efficiency
    def processLargeDataset(): Unit = {
      // Use iterator to process large dataset, memory usage is constant
      val result = Iterator.range(1, 10000000)
        .filter(_ % 1000 == 0)
        .map(x => x * x)
        .take(100)
        .sum

      println(s"Processed large dataset result: $result")
    }

    timeOperation("Large dataset processing") {
      processLargeDataset()
    }
  }
}

Custom Iterators

Creating Custom Iterators

scala
object CustomIterators {
  // Fibonacci sequence iterator
  class FibonacciIterator extends Iterator[Long] {
    private var current = 0L
    private var next = 1L

    def hasNext: Boolean = true  // Infinite sequence

    def next(): Long = {
      val result = current
      val temp = current + next
      current = next
      next = temp
      result
    }
  }

  // Prime number iterator
  class PrimeIterator extends Iterator[Int] {
    private var current = 2

    def hasNext: Boolean = true  // Infinite sequence

    def next(): Int = {
      while (!isPrime(current)) {
        current += 1
      }
      val result = current
      current += 1
      result
    }

    private def isPrime(n: Int): Boolean = {
      if (n < 2) false
      else !(2 to math.sqrt(n).toInt).exists(n % _ == 0)
    }
  }

  // Use companion object to create factory methods
  object FibonacciIterator {
    def apply(): FibonacciIterator = new FibonacciIterator()
  }

  object PrimeIterator {
    def apply(): PrimeIterator = new PrimeIterator()
  }

  def main(args: Array[String]): Unit = {
    // Use Fibonacci iterator
    val fibonacci = FibonacciIterator()
    println(s"First 15 Fibonacci numbers: ${fibonacci.take(15).toList}")

    // Use prime iterator
    val primes = PrimeIterator()
    println(s"First 20 prime numbers: ${primes.take(20).toList}")

    // Combine custom iterators
    val fibPrimes = FibonacciIterator()
      .take(100)
      .filter(fib => PrimeIterator().take(1000).contains(fib.toInt))

    println(s"Fibonacci numbers that are also prime: ${fibPrimes.toList}")
  }
}

Iterator Factory Methods

scala
object IteratorFactories {
  // Create geometric progression iterator
  def geometricSeries(start: Double, ratio: Double): Iterator[Double] = {
    Iterator.iterate(start)(_ * ratio)
  }

  // Create random number iterator
  def randomNumbers(seed: Long = System.currentTimeMillis()): Iterator[Int] = {
    val random = new scala.util.Random(seed)
    Iterator.continually(random.nextInt(100))
  }

  // Create file line iterator
  def fileLines(filename: String): Iterator[String] = {
    val source = scala.io.Source.fromFile(filename)
    source.getLines()
  }

  // Create tree traversal iterator
  case class TreeNode[T](value: T, children: List[TreeNode[T]] = Nil)

  def depthFirstTraversal[T](root: TreeNode[T]): Iterator[T] = {
    def traverse(nodes: List[TreeNode[T]]): Iterator[T] = {
      nodes match {
        case Nil => Iterator.empty
        case head :: tail =>
          Iterator.single(head.value) ++ traverse(head.children) ++ traverse(tail)
      }
    }
    traverse(List(root))
  }

  def breadthFirstTraversal[T](root: TreeNode[T]): Iterator[T] = {
    def traverse(queue: List[TreeNode[T]]): Iterator[T] = {
      queue match {
        case Nil => Iterator.empty
        case head :: tail =>
          Iterator.single(head.value) ++ traverse(tail ++ head.children)
      }
    }
    traverse(List(root))
  }

  def main(args: Array[String]): Unit = {
    // Geometric progression
    val geometric = geometricSeries(1.0, 2.0)
    println(s"Geometric series (1, 2, 4, 8, ...): ${geometric.take(10).toList}")

    // Random numbers
    val random = randomNumbers(42)  // Fixed seed to get reproducible results
    println(s"Random numbers: ${random.take(10).toList}")

    // Tree traversal
    val tree = TreeNode(1, List(
      TreeNode(2, List(TreeNode(4), TreeNode(5))),
      TreeNode(3, List(TreeNode(6), TreeNode(7)))
    ))

    println(s"Depth-first traversal: ${depthFirstTraversal(tree).toList}")
    println(s"Breadth-first traversal: ${breadthFirstTraversal(tree).toList}")
  }
}

Practical Application Examples

Data Stream Processing

scala
object DataStreamProcessing {
  // Simulate data stream
  case class LogEntry(timestamp: Long, level: String, message: String)

  def generateLogStream(): Iterator[LogEntry] = {
    val levels = Array("INFO", "WARN", "ERROR", "DEBUG")
    val messages = Array("User login", "Database query", "Cache miss", "Network timeout")
    val random = new scala.util.Random()

    Iterator.continually {
      LogEntry(
        System.currentTimeMillis() + random.nextInt(1000),
        levels(random.nextInt(levels.length)),
        messages(random.nextInt(messages.length))
      )
    }
  }

  def processLogStream(logs: Iterator[LogEntry]): Unit = {
    // Real-time log stream processing
    val errorLogs = logs
      .filter(_.level == "ERROR")
      .take(5)  // Only process first 5 errors

    println("Processing error logs:")
    errorLogs.foreach { log =>
      println(s"[${log.timestamp}] ERROR: ${log.message}")
      Thread.sleep(100)  // Simulate processing time
    }
  }

  // Batch processing
  def batchProcess(logs: Iterator[LogEntry], batchSize: Int): Iterator[List[LogEntry]] = {
    logs.grouped(batchSize)
  }

  // Window processing
  def slidingWindowProcess(logs: Iterator[LogEntry], windowSize: Int): Iterator[List[LogEntry]] = {
    logs.sliding(windowSize)
  }

  def main(args: Array[String]): Unit = {
    val logStream = generateLogStream()

    // Real-time processing
    println("Real-time processing:")
    processLogStream(logStream.take(20))

    // Batch processing example
    println("\nBatch processing:")
    val batches = batchProcess(generateLogStream().take(10), 3)
    batches.zipWithIndex.foreach { case (batch, index) =>
      println(s"Batch $index: ${batch.size} logs")
      batch.foreach(log => println(s"  ${log.level}: ${log.message}"))
    }

    // Sliding window processing
    println("\nSliding window processing:")
    val windows = slidingWindowProcess(generateLogStream().take(8), 3)
    windows.zipWithIndex.foreach { case (window, index) =>
      println(s"Window $index: ${window.map(_.level).mkString(", ")}")
    }
  }
}

File Processing

scala
import java.io.{File, PrintWriter}

object FileProcessing {
  // Create test file
  def createTestFile(filename: String): Unit = {
    val writer = new PrintWriter(new File(filename))
    try {
      (1 to 1000).foreach { i =>
        writer.println(s"Line $i: This is line number $i with some random data ${scala.util.Random.nextInt(100)}")
      }
    } finally {
      writer.close()
    }
  }

  // Use iterator to process large file
  def processLargeFile(filename: String): Unit = {
    val source = scala.io.Source.fromFile(filename)
    try {
      val lines = source.getLines()

      // Count lines containing specific word
      val wordCount = lines
        .filter(_.contains("random"))
        .map(_.split("\\s+").length)
        .sum

      println(s"Total words in lines containing 'random': $wordCount")
    } finally {
      source.close()
    }
  }

  // Process file in chunks
  def processFileInChunks(filename: String, chunkSize: Int): Unit = {
    val source = scala.io.Source.fromFile(filename)
    try {
      val lines = source.getLines()
      val chunks = lines.grouped(chunkSize)

      chunks.zipWithIndex.foreach { case (chunk, index) =>
        val chunkList = chunk.toList
        val avgLength = chunkList.map(_.length).sum.toDouble / chunkList.size
        println(f"Chunk $index: ${chunkList.size} lines, avg length: $avgLength%.2f")
      }
    } finally {
      source.close()
    }
  }

  // Filter and transform file content
  def filterAndTransform(inputFile: String, outputFile: String): Unit = {
    val source = scala.io.Source.fromFile(inputFile)
    val writer = new PrintWriter(new File(outputFile))

    try {
      val lines = source.getLines()

      // Filter lines containing numbers, and convert to uppercase
      val processed = lines
        .filter(_.matches(".*\\d+.*"))
        .map(_.toUpperCase)
        .take(10)  // Only process first 10 lines

      processed.foreach(writer.println)

      println(s"Processed lines written to $outputFile")
    } finally {
      source.close()
      writer.close()
    }
  }

  def main(args: Array[String]): Unit = {
    val testFile = "test_data.txt"
    val outputFile = "processed_data.txt"

    // Create test file
    createTestFile(testFile)
    println(s"Created test file: $testFile")

    // Process large file
    processLargeFile(testFile)

    // Process in chunks
    println("\nProcessing in chunks:")
    processFileInChunks(testFile, 100)

    // Filter and transform
    filterAndTransform(testFile, outputFile)

    // Clean up files
    new File(testFile).delete()
    new File(outputFile).delete()
  }
}

Best Practices

  1. When to Use Iterators:

    • Processing large datasets
    • Need lazy evaluation
    • Memory usage is a key consideration
    • Processing infinite sequences
  2. Performance Considerations:

    • Iterator is one-time use, cannot be reused
    • Lazy evaluation can improve performance and memory efficiency
    • Avoid calling size or length on iterators
  3. Memory Management:

    • Iterator doesn't store all elements in memory
    • Suitable for processing stream data
    • Pay attention to resource management (like file handles)
  4. Functional Programming:

    • Use map, filter, flatMap and other operations
    • Avoid side effects
    • Take advantage of lazy evaluation of chain operations
  5. Error Handling:

    • Check hasNext to avoid NoSuchElementException
    • Use Option type to handle potentially null values
    • Properly manage resources (use try-finally or using pattern)

Iterators are powerful tools in Scala for processing data streams and large datasets, and mastering their usage is crucial for writing efficient programs.

Content is for learning and research only.