Skip to content

Scala Extractors

Extractors are a powerful feature in Scala that allows you to define how to extract values from objects. Extractors are implemented through the unapply method and are the foundation of pattern matching.

Basic Extractors

unapply Method

scala
object BasicExtractors {
  // Simple extractor
  object Even {
    def unapply(n: Int): Option[Int] = {
      if (n % 2 == 0) Some(n) else None
    }
  }

  object Odd {
    def unapply(n: Int): Option[Int] = {
      if (n % 2 != 0) Some(n) else None
    }
  }

  // Extractors can return different values
  object Square {
    def unapply(n: Int): Option[Int] = {
      val sqrt = math.sqrt(n).toInt
      if (sqrt * sqrt == n) Some(sqrt) else None
    }
  }

  // Boolean extractors
  object Positive {
    def unapply(n: Int): Boolean = n > 0
  }

  object Negative {
    def unapply(n: Int): Boolean = n < 0
  }

  def analyzeNumber(n: Int): String = n match {
    case Even(x) => s"$x is even"
    case Odd(x) => s"$x is odd"
  }

  def analyzeSpecialNumbers(n: Int): String = n match {
    case Square(root) => s"$n is a perfect square, square root is $root"
    case Positive() => s"$n is positive"
    case Negative() => s"$n is negative"
    case _ => s"$n is zero"
  }

  def main(args: Array[String]): Unit = {
    val numbers = List(1, 2, 3, 4, 9, 16, -5, 0, 25)

    println("Number analysis:")
    numbers.foreach(n => println(s"$n -> ${analyzeNumber(n)}"))

    println("\nSpecial number analysis:")
    numbers.foreach(n => println(s"$n -> ${analyzeSpecialNumbers(n)}"))
  }
}

Multi-value Extractors

scala
object MultiValueExtractors {
  // Extractors that extract multiple values
  object FullName {
    def unapply(fullName: String): Option[(String, String)] = {
      val parts = fullName.split(" ")
      if (parts.length == 2) Some((parts(0), parts(1)))
      else None
    }
  }

  // Extractors that extract variable number of values
  object Words {
    def unapplySeq(sentence: String): Option[Seq[String]] = {
      val words = sentence.split("\\s+").filter(_.nonEmpty)
      if (words.nonEmpty) Some(words.toSeq) else None
    }
  }

  // Number range extractor
  object Range {
    def unapply(input: String): Option[(Int, Int)] = {
      val rangePattern = """(\d+)-(\d+)""".r
      input match {
        case rangePattern(start, end) => Some((start.toInt, end.toInt))
        case _ => None
      }
    }
  }

  // Coordinate extractor
  object Coordinate {
    def unapply(input: String): Option[(Double, Double)] = {
      val coordPattern = """\((-?\d+\.?\d*),\s*(-?\d+\.?\d*)\)""".r
      input match {
        case coordPattern(x, y) => Some((x.toDouble, y.toDouble))
        case _ => None
      }
    }
  }

  def processInput(input: String): String = input match {
    case FullName(first, last) =>
      s"Name: $first $last"

    case Words(first, second, rest @ _*) =>
      s"Sentence: First word='$first', Second word='$second', Rest=${rest.mkString(", ")}"

    case Range(start, end) =>
      s"Range: From $start to $end"

    case Coordinate(x, y) =>
      s"Coordinate: ($x, $y)"

    case _ =>
      s"Unrecognized format: $input"
  }

  def main(args: Array[String]): Unit = {
    val inputs = List(
      "John Doe",
      "The quick brown fox jumps",
      "1-100",
      "(3.14, 2.71)",
      "single",
      "invalid format"
    )

    inputs.foreach(input => println(s"'$input' -> ${processInput(input)}"))
  }
}

Advanced Extractor Patterns

Nested Extractors

scala
object NestedExtractors {
  case class Address(street: String, city: String, zipCode: String)
  case class Person(name: String, age: Int, address: Address)

  // Age group extractor
  object AgeGroup {
    def unapply(age: Int): Option[String] = age match {
      case a if a < 18 => Some("Minor")
      case a if a < 65 => Some("Adult")
      case _ => Some("Senior")
    }
  }

  // City type extractor
  object CityType {
    val majorCities = Set("Beijing", "Shanghai", "Guangzhou", "Shenzhen")

    def unapply(city: String): Option[String] = {
      if (majorCities.contains(city)) Some("First-tier city")
      else Some("Other city")
    }
  }

  // Zip code region extractor
  object ZipRegion {
    def unapply(zipCode: String): Option[String] = {
      zipCode.take(2) match {
        case "10" | "11" => Some("North China")
        case "20" | "21" => Some("East China")
        case "30" | "31" => Some("South China")
        case _ => Some("Other")
      }
    }
  }

  def analyzePerson(person: Person): String = person match {
    // Nested pattern matching
    case Person(name, AgeGroup(ageGroup), Address(_, CityType(cityType), ZipRegion(region))) =>
      s"$name is $ageGroup, lives in $cityType ($region region)"

    case Person(name, age, Address(street, city, _)) if age > 60 =>
      s"$name is a senior, lives on $street in $city"

    case Person(name, _, Address(_, "Beijing", _)) =>
      s"$name lives in the capital Beijing"

    case Person(name, age, _) =>
      s"$name, $age years old"
  }

  def main(args: Array[String]): Unit = {
    val people = List(
      Person("Zhang San", 25, Address("Zhongguancun Street", "Beijing", "100080")),
      Person("Li Si", 16, Address("Nanjing Road", "Shanghai", "200000")),
      Person("Wang Wu", 70, Address("Tianhe Road", "Guangzhou", "310000")),
      Person("Zhao Liu", 35, Address("Jiefang Road", "Wuhan", "430000"))
    )

    people.foreach(person => println(analyzePerson(person)))
  }
}

Conditional Extractors

scala
object ConditionalExtractors {
  // Extractor with conditions
  object ValidEmail {
    def unapply(email: String): Option[String] = {
      if (email.contains("@") && email.contains(".")) Some(email.toLowerCase)
      else None
    }
  }

  object StrongPassword {
    def unapply(password: String): Option[String] = {
      val hasUpper = password.exists(_.isUpper)
      val hasLower = password.exists(_.isLower)
      val hasDigit = password.exists(_.isDigit)
      val hasSpecial = password.exists("!@#$%^&*".contains(_))
      val isLongEnough = password.length >= 8

      if (hasUpper && hasLower && hasDigit && hasSpecial && isLongEnough) {
        Some(password)
      } else None
    }
  }

  // Numeric range extractor
  object InRange {
    def unapply(value: Int): Option[String] = value match {
      case v if v >= 0 && v <= 100 => Some("Normal range")
      case v if v > 100 => Some("Exceeds upper limit")
      case _ => Some("Below lower limit")
    }
  }

  // File type extractor
  object FileType {
    def unapply(filename: String): Option[String] = {
      val extension = filename.split("\\.").lastOption.map(_.toLowerCase)
      extension match {
        case Some("jpg" | "jpeg" | "png" | "gif") => Some("Image")
        case Some("mp4" | "avi" | "mov") => Some("Video")
        case Some("txt" | "doc" | "pdf") => Some("Document")
        case Some("mp3" | "wav" | "flac") => Some("Audio")
        case _ => Some("Other")
      }
    }
  }

  def validateUser(email: String, password: String): String = (email, password) match {
    case (ValidEmail(validEmail), StrongPassword(strongPass)) =>
      s"User validation successful: $validEmail"

    case (ValidEmail(_), _) =>
      "Email is valid, but password is not strong enough"

    case (_, StrongPassword(_)) =>
      "Password strength is sufficient, but email is invalid"

    case _ =>
      "Both email and password do not meet requirements"
  }

  def analyzeValue(value: Int): String = value match {
    case InRange(status) => s"Value $value: $status"
  }

  def classifyFile(filename: String): String = filename match {
    case FileType(fileType) => s"File '$filename' is $fileType type"
  }

  def main(args: Array[String]): Unit = {
    // User validation tests
    val userTests = List(
      ("user@example.com", "StrongPass123!"),
      ("invalid-email", "StrongPass123!"),
      ("user@example.com", "weak"),
      ("invalid", "weak")
    )

    println("User validation tests:")
    userTests.foreach { case (email, password) =>
      println(s"$email, $password -> ${validateUser(email, password)}")
    }

    // Numeric range tests
    println("\nNumeric range tests:")
    List(-10, 50, 150).foreach(value => println(analyzeValue(value)))

    // File type tests
    println("\nFile type tests:")
    List("photo.jpg", "video.mp4", "document.pdf", "music.mp3", "data.csv")
      .foreach(filename => println(classifyFile(filename)))
  }
}

Custom Data Structure Extractors

Linked List Extractors

scala
object CustomDataStructureExtractors {
  // Custom linked list
  sealed trait MyList[+T]
  case object MyNil extends MyList[Nothing]
  case class MyCons[T](head: T, tail: MyList[T]) extends MyList[T]

  // Linked list extractor
  object MyList {
    def apply[T](elements: T*): MyList[T] = {
      elements.foldRight(MyNil: MyList[T])(MyCons(_, _))
    }

    // Extract head and tail
    def unapply[T](list: MyList[T]): Option[(T, MyList[T])] = list match {
      case MyCons(head, tail) => Some((head, tail))
      case MyNil => None
    }
  }

  // Special pattern extractors
  object SingleElement {
    def unapply[T](list: MyList[T]): Option[T] = list match {
      case MyCons(head, MyNil) => Some(head)
      case _ => None
    }
  }

  object FirstTwo {
    def unapply[T](list: MyList[T]): Option[(T, T)] = list match {
      case MyCons(first, MyCons(second, _)) => Some((first, second))
      case _ => None
    }
  }

  // Binary tree
  sealed trait BinaryTree[+T]
  case object Empty extends BinaryTree[Nothing]
  case class Node[T](value: T, left: BinaryTree[T], right: BinaryTree[T]) extends BinaryTree[T]

  // Binary tree extractors
  object Leaf {
    def unapply[T](tree: BinaryTree[T]): Option[T] = tree match {
      case Node(value, Empty, Empty) => Some(value)
      case _ => None
    }
  }

  object LeftChild {
    def unapply[T](tree: BinaryTree[T]): Option[(T, BinaryTree[T])] = tree match {
      case Node(value, left, Empty) => Some((value, left))
      case _ => None
    }
  }

  object RightChild {
    def unapply[T](tree: BinaryTree[T]): Option[(T, BinaryTree[T])] = tree match {
      case Node(value, Empty, right) => Some((value, right))
      case _ => None
    }
  }

  def analyzeList[T](list: MyList[T]): String = list match {
    case MyNil => "Empty list"
    case SingleElement(element) => s"Single element list: $element"
    case FirstTwo(first, second) => s"First two elements: $first, $second"
    case MyList(head, tail) => s"Head element: $head, Tail: ${analyzeList(tail)}"
  }

  def analyzeTree[T](tree: BinaryTree[T]): String = tree match {
    case Empty => "Empty tree"
    case Leaf(value) => s"Leaf node: $value"
    case LeftChild(value, left) => s"Node with only left subtree: $value, left subtree: ${analyzeTree(left)}"
    case RightChild(value, right) => s"Node with only right subtree: $value, right subtree: ${analyzeTree(right)}"
    case Node(value, left, right) => s"Full node: $value, left: ${analyzeTree(left)}, right: ${analyzeTree(right)}"
  }

  def main(args: Array[String]): Unit = {
    // Test linked list
    val lists = List(
      MyNil,
      MyList(1),
      MyList(1, 2),
      MyList(1, 2, 3, 4)
    )

    println("Linked list analysis:")
    lists.foreach(list => println(analyzeList(list)))

    // Test binary tree
    val trees = List(
      Empty,
      Node(1, Empty, Empty),  // Leaf
      Node(1, Node(2, Empty, Empty), Empty),  // Only left subtree
      Node(1, Empty, Node(3, Empty, Empty)),  // Only right subtree
      Node(1, Node(2, Empty, Empty), Node(3, Empty, Empty))  // Full tree
    )

    println("\nBinary tree analysis:")
    trees.foreach(tree => println(analyzeTree(tree)))
  }
}

Practical Application Examples

URL Parser

scala
object URLParser {
  case class URL(protocol: String, host: String, port: Option[Int], path: String, query: Map[String, String])

  object URL {
    def unapply(urlString: String): Option[URL] = {
      val urlPattern = """^(https?):\/\/([^:\/\s]+)(?::(\d+))?([^?\s]*)(?:\?(.*))?$""".r

      urlString match {
        case urlPattern(protocol, host, portStr, path, queryStr) =>
          val port = Option(portStr).map(_.toInt)
          val query = parseQuery(Option(queryStr).getOrElse(""))
          Some(URL(protocol, host, port, if (path.isEmpty) "/" else path, query))
        case _ => None
      }
    }

    private def parseQuery(queryString: String): Map[String, String] = {
      if (queryString.isEmpty) Map.empty
      else {
        queryString.split("&").map { param =>
          val parts = param.split("=", 2)
          parts(0) -> (if (parts.length > 1) parts(1) else "")
        }.toMap
      }
    }
  }

  // Specific protocol extractors
  object HttpsURL {
    def unapply(url: URL): Option[URL] = {
      if (url.protocol == "https") Some(url) else None
    }
  }

  object LocalURL {
    def unapply(url: URL): Option[URL] = {
      if (url.host == "localhost" || url.host == "127.0.0.1") Some(url) else None
    }
  }

  def analyzeURL(urlString: String): String = urlString match {
    case URL(HttpsURL(url)) =>
      s"Secure HTTPS connection: ${url.host}${url.path}"

    case URL(LocalURL(url)) =>
      s"Local connection: ${url.protocol}://${url.host}:${url.port.getOrElse("default port")}"

    case URL(url) =>
      s"Regular URL: ${url.protocol}://${url.host}${url.path}" +
      (if (url.query.nonEmpty) s", query parameters: ${url.query}" else "")

    case _ =>
      s"Invalid URL: $urlString"
  }

  def main(args: Array[String]): Unit = {
    val urls = List(
      "https://www.example.com/path?param=value",
      "http://localhost:8080/api/users",
      "https://api.github.com/repos/owner/repo",
      "http://127.0.0.1:3000/",
      "invalid-url"
    )

    urls.foreach(url => println(s"$url -> ${analyzeURL(url)}"))
  }
}

Log Parser

scala
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter

object LogParser {
  case class LogEntry(
    timestamp: LocalDateTime,
    level: String,
    logger: String,
    message: String,
    thread: Option[String] = None
  )

  // Standard log format extractor
  object StandardLog {
    private val pattern = """(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) \[(\w+)\] (\w+): (.+)""".r
    private val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")

    def unapply(logLine: String): Option[LogEntry] = logLine match {
      case pattern(timestampStr, level, logger, message) =>
        try {
          val timestamp = LocalDateTime.parse(timestampStr, formatter)
          Some(LogEntry(timestamp, level, logger, message))
        } catch {
          case _: Exception => None
        }
      case _ => None
    }
  }

  // Log format with thread info
  object ThreadedLog {
    private val pattern = """(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) \[(\w+)\] \[([^\]]+)\] (\w+): (.+)""".r
    private val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")

    def unapply(logLine: String): Option[LogEntry] = logLine match {
      case pattern(timestampStr, level, thread, logger, message) =>
        try {
          val timestamp = LocalDateTime.parse(timestampStr, formatter)
          Some(LogEntry(timestamp, level, logger, message, Some(thread)))
        } catch {
          case _: Exception => None
        }
      case _ => None
    }
  }

  // Error level extractor
  object ErrorLog {
    def unapply(entry: LogEntry): Option[LogEntry] = {
      if (entry.level == "ERROR") Some(entry) else None
    }
  }

  object WarningLog {
    def unapply(entry: LogEntry): Option[LogEntry] = {
      if (entry.level == "WARN") Some(entry) else None
    }
  }

  // Specific time range extractor
  object RecentLog {
    def unapply(entry: LogEntry): Option[LogEntry] = {
      val now = LocalDateTime.now()
      val oneHourAgo = now.minusHours(1)
      if (entry.timestamp.isAfter(oneHourAgo)) Some(entry) else None
    }
  }

  def analyzeLogEntry(logLine: String): String = logLine match {
    case ThreadedLog(ErrorLog(entry)) =>
      s"🔴 Thread error: [${entry.thread.get}] ${entry.logger} - ${entry.message}"

    case StandardLog(ErrorLog(entry)) =>
      s"🔴 Error: ${entry.logger} - ${entry.message}"

    case ThreadedLog(WarningLog(entry)) =>
      s"🟡 Thread warning: [${entry.thread.get}] ${entry.logger} - ${entry.message}"

    case StandardLog(WarningLog(entry)) =>
      s"🟡 Warning: ${entry.logger} - ${entry.message}"

    case ThreadedLog(RecentLog(entry)) =>
      s"🕐 Recent thread log: [${entry.thread.get}] ${entry.level} - ${entry.message}"

    case StandardLog(RecentLog(entry)) =>
      s"🕐 Recent log: ${entry.level} - ${entry.message}"

    case ThreadedLog(entry) =>
      s"📝 Thread log: [${entry.thread.get}] ${entry.level} - ${entry.logger}"

    case StandardLog(entry) =>
      s"📝 Standard log: ${entry.level} - ${entry.logger}"

    case _ =>
      s"❓ Unable to parse log: $logLine"
  }

  def main(args: Array[String]): Unit = {
    val logLines = List(
      "2023-12-25 10:30:45 [INFO] UserService: User login successful",
      "2023-12-25 10:31:02 [ERROR] DatabaseService: Connection timeout",
      "2023-12-25 10:31:15 [WARN] [main-thread] CacheService: Cache miss for key: user_123",
      "2023-12-25 10:31:30 [ERROR] [worker-1] PaymentService: Payment processing failed",
      "Invalid log format",
      "2023-12-25 10:32:00 [DEBUG] SecurityService: Token validation passed"
    )

    println("Log analysis results:")
    logLines.foreach(line => println(analyzeLogEntry(line)))
  }
}

Best Practices

Extractor Design Principles

scala
object ExtractorBestPractices {
  // 1. Keep extractors simple and focused
  object EmailDomain {
    def unapply(email: String): Option[String] = {
      val atIndex = email.indexOf('@')
      if (atIndex > 0 && atIndex < email.length - 1) {
        Some(email.substring(atIndex + 1))
      } else None
    }
  }

  // 2. Provide meaningful return values
  object Temperature {
    def unapply(celsius: Double): Option[String] = celsius match {
      case c if c < 0 => Some("Below freezing point")
      case c if c < 10 => Some("Cold")
      case c if c < 25 => Some("Cool")
      case c if c < 35 => Some("Warm")
      case _ => Some("Hot")
    }
  }

  // 3. Consider performance, avoid complex computations
  object FastPrime {
    private val knownPrimes = Set(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31)

    def unapply(n: Int): Boolean = {
      if (n <= 31) knownPrimes.contains(n)
      else isPrime(n)  // Only do complex computation for large numbers
    }

    private def isPrime(n: Int): Boolean = {
      if (n < 2) false
      else !(2 to math.sqrt(n).toInt).exists(n % _ == 0)
    }
  }

  // 4. Combine extractors
  object ValidUser {
    def unapply(input: (String, String, Int)): Option[(String, String, Int)] = {
      val (name, email, age) = input

      val validName = name.trim.nonEmpty && name.length >= 2
      val validEmail = email.contains("@") && email.contains(".")
      val validAge = age >= 0 && age <= 150

      if (validName && validEmail && validAge) Some((name, email, age))
      else None
    }
  }

  // 5. Error handling
  object SafeInt {
    def unapply(s: String): Option[Int] = {
      try {
        Some(s.toInt)
      } catch {
        case _: NumberFormatException => None
      }
    }
  }

  def demonstrateBestPractices(): Unit = {
    // Email domain extraction
    val emails = List("user@gmail.com", "admin@company.org", "invalid-email")
    emails.foreach {
      case email @ EmailDomain(domain) => println(s"$email's domain is $domain")
      case email => println(s"$email is not a valid email")
    }

    // Temperature classification
    val temperatures = List(-5.0, 5.0, 20.0, 30.0, 40.0)
    temperatures.foreach {
      case temp @ Temperature(category) => println(s"${temp}°C is $category")
    }

    // Prime checking
    val numbers = List(2, 4, 17, 25, 29)
    numbers.foreach {
      case n @ FastPrime() => println(s"$n is prime")
      case n => println(s"$n is not prime")
    }

    // User validation
    val users = List(
      ("Alice", "alice@example.com", 25),
      ("", "invalid", -5),
      ("Bob", "bob@test.com", 30)
    )

    users.foreach {
      case ValidUser(name, email, age) => println(s"Valid user: $name, $email, $age")
      case (name, email, age) => println(s"Invalid user: $name, $email, $age")
    }

    // Safe integer parsing
    val numberStrings = List("123", "abc", "456")
    numberStrings.foreach {
      case SafeInt(number) => println(s"Parsing successful: $number")
      case str => println(s"Parsing failed: $str")
    }
  }

  def main(args: Array[String]): Unit = {
    demonstrateBestPractices()
  }
}

Summary

Extractors are the core mechanism of Scala pattern matching:

  1. Basic Concepts:

    • unapply method defines how to extract values
    • Returns Option[T] or Boolean
    • unapplySeq for variable number of values
  2. Design Principles:

    • Keep simple and focused
    • Provide meaningful return values
    • Consider performance impact
    • Proper error handling
  3. Application Scenarios:

    • Data validation and parsing
    • Pattern matching enhancement
    • Domain-specific languages
    • API design
  4. Best Practices:

    • Combine simple extractors
    • Avoid side effects
    • Consider type safety
    • Provide clear documentation

Extractors make pattern matching more powerful and flexible, and are an important tool for functional programming in Scala.

Content is for learning and research only.