Skip to content

Ruby File I/O

File processing is a fundamental operation in programming. Ruby provides powerful and flexible file I/O capabilities. Whether reading configuration files, processing logs, manipulating data files, or performing filesystem operations, Ruby offers rich built-in methods to simplify these tasks. This chapter will provide a detailed introduction to various file processing and I/O operation methods and best practices in Ruby.

🎯 Basic File Operations

File Opening and Closing

ruby
# Basic file opening method
file = File.open("example.txt", "r")
content = file.read
file.close

# Using blocks to automatically close files (recommended method)
File.open("example.txt", "r") do |file|
  content = file.read
  puts content
end  # File automatically closed

# Different file modes
# "r"  - Read-only mode (default)
# "w"  - Write mode (overwrites file content)
# "a"  - Append mode
# "r+" - Read-write mode
# "w+" - Read-write mode (overwrites file content)
# "a+" - Read-write mode (appends)

# Specify encoding
File.open("example.txt", "r:utf-8") do |file|
  content = file.read
  puts content
end

File Reading Operations

ruby
# Read entire file content
File.open("example.txt", "r") do |file|
  content = file.read
  puts content
end

# Read specified number of bytes from file
File.open("example.txt", "r") do |file|
  chunk = file.read(100)  # Read first 100 bytes
  puts chunk
end

# Read file line by line
File.open("example.txt", "r") do |file|
  file.each_line do |line|
    puts "Line: #{line.chomp}"  # chomp removes newline character
  end
end

# Read all lines to array
File.open("example.txt", "r") do |file|
  lines = file.readlines
  puts lines.inspect
end

# Read single line
File.open("example.txt", "r") do |file|
  first_line = file.gets
  puts "First line: #{first_line}"
end

# Use File.read shortcut method
content = File.read("example.txt")
puts content

# Use File.readlines shortcut method
lines = File.readlines("example.txt")
puts lines.inspect

✍️ File Writing Operations

Basic Write Operations

ruby
# Overwrite file
File.open("output.txt", "w") do |file|
  file.write("Hello, World!\n")
  file.write("This is the second line\n")
end

# Use puts to write (automatically adds newline)
File.open("output.txt", "w") do |file|
  file.puts("Hello, World!")
  file.puts("This is the second line")
end

# Append to file
File.open("output.txt", "a") do |file|
  file.puts("This is appended content")
end

# Use File.write shortcut method
File.write("output.txt", "Hello, World!\n", mode: "w")

# Append write
File.write("output.txt", "Appended content\n", mode: "a")

# Formatted write
data = { name: "Alice", age: 25, city: "New York" }
File.open("user.txt", "w") do |file|
  file.puts "User information:"
  data.each { |key, value| file.puts "#{key}: #{value}" }
end

Advanced Write Operations

ruby
# Write binary file
image_data = File.read("input.jpg", mode: "rb")
File.open("output.jpg", "wb") do |file|
  file.write(image_data)
end

# Use print method to write
File.open("output.txt", "w") do |file|
  file.print("Hello")
  file.print(" World!")  # No newline added
  file.puts  # Add newline
end

# Write array content
lines = ["First line", "Second line", "Third line"]
File.open("output.txt", "w") do |file|
  lines.each { |line| file.puts(line) }
end

# Use << operator
File.open("output.txt", "w") do |file|
  file << "Hello, World!\n"
  file << "This is the second line\n"
end

# Write hash data
config = {
  database: { host: "localhost", port: 5432 },
  logging: { level: "info", file: "app.log" }
}

File.open("config.txt", "w") do |file|
  config.each do |section, settings|
    file.puts "[#{section}]"
    settings.each { |key, value| file.puts "#{key} = #{value}" }
    file.puts
  end
end

📁 File System Operations

File and Directory Check

ruby
# Check if file exists
puts File.exist?("example.txt")  # true/false
puts File.exist?("nonexistent.txt")  # false

# Check if it's a file
puts File.file?("example.txt")  # true/false

# Check if it's a directory
puts File.directory?("docs")  # true/false

# Check if file is readable
puts File.readable?("example.txt")  # true/false

# Check if file is writable
puts File.writable?("example.txt")  # true/false

# Check if file is executable
puts File.executable?("script.rb")  # true/false

# Get file size
puts File.size("example.txt")  # bytes

# Check if file is empty
puts File.zero?("empty.txt")  # true/false

# Get file modification time
puts File.mtime("example.txt")  # 2023-12-25 14:30:45 +0800

# Get file creation time
puts File.ctime("example.txt")  # 2023-12-25 14:30:45 +0800

# Get file access time
puts File.atime("example.txt")  # 2023-12-25 14:30:45 +0800

Directory Operations

ruby
require 'fileutils'

# List directory contents
puts Dir.entries(".")  # All files and directories in current directory

# Get current working directory
puts Dir.pwd  # D:/Workspace/Coding/...

# Change current working directory
Dir.chdir("docs") do
  puts Dir.pwd  # Changed directory
  # Perform operations in this directory
end

# Create directory
Dir.mkdir("new_directory")

# Create multi-level directories
FileUtils.mkdir_p("parent/child/grandchild")

# Delete empty directory
Dir.rmdir("empty_directory")

# Delete directory and its contents
FileUtils.rm_rf("directory_to_delete")

# Copy directory
FileUtils.cp_r("source_directory", "destination_directory")

# Move directory
FileUtils.mv("old_directory", "new_directory")

File Operations

ruby
require 'fileutils'

# Copy file
FileUtils.cp("source.txt", "destination.txt")

# Copy multiple files
FileUtils.cp(["file1.txt", "file2.txt"], "destination_directory")

# Move file
FileUtils.mv("old_name.txt", "new_name.txt")

# Delete file
FileUtils.rm("unwanted.txt")

# Delete multiple files
FileUtils.rm(["file1.txt", "file2.txt"])

# Safe delete (ignore non-existent files)
FileUtils.rm_f("maybe_exists.txt")

# Force delete (no confirmation prompt)
FileUtils.rm_rf("directory_or_file")

# Create hard link
FileUtils.ln("source.txt", "hard_link.txt")

# Create symbolic link
FileUtils.ln_s("source.txt", "soft_link.txt")

# Change file permissions
FileUtils.chmod(0644, "file.txt")

# Change file owner (requires administrator permissions)
# FileUtils.chown("user", "group", "file.txt")

# Change file timestamp
FileUtils.touch("file.txt")

🔍 File Search and Traversal

Glob Pattern Matching

ruby
# Find all .rb files
ruby_files = Dir.glob("*.rb")
puts ruby_files.inspect

# Recursively find all .rb files
all_ruby_files = Dir.glob("**/*.rb")
puts all_ruby_files.inspect

# Find files in specific directory
docs_files = Dir.glob("docs/*")
puts docs_files.inspect

# Use multiple patterns
mixed_files = Dir.glob(["*.rb", "*.md", "config/*.yml"])
puts mixed_files.inspect

# Find hidden files
hidden_files = Dir.glob(".*")
puts hidden_files.inspect

# Find files with specific extension
image_files = Dir.glob("*.{jpg,png,gif}")
puts image_files.inspect

# Find numerically named files
numbered_files = Dir.glob("[0-9]*.txt")
puts numbered_files.inspect

Directory Traversal

ruby
# Recursively traverse directory
def traverse_directory(dir)
  Dir.foreach(dir) do |entry|
    next if entry == "." || entry == ".."
    
    path = File.join(dir, entry)
    if File.directory?(path)
      puts "Directory: #{path}"
      traverse_directory(path)  # Recursively traverse subdirectories
    else
      puts "File: #{path}"
    end
  end
end

# traverse_directory(".")

# Use Find module for traversal
require 'find'

Find.find(".") do |path|
  if FileTest.directory?(path)
    puts "Directory: #{path}"
  else
    puts "File: #{path}"
  end
end

# Traverse by depth
def traverse_by_depth(dir, depth = 0)
  indent = "  " * depth
  Dir.foreach(dir) do |entry|
    next if entry == "." || entry == ".."
    
    path = File.join(dir, entry)
    if File.directory?(path)
      puts "#{indent}Directory: #{entry}"
      traverse_by_depth(path, depth + 1)
    else
      puts "#{indent}File: #{entry}"
    end
  end
end

# traverse_by_depth(".")

🎯 Practical File Processing Examples

Configuration File Handler

ruby
class ConfigFileHandler
  # Read simple key-value configuration file
  def self.read_config(filename)
    config = {}
    File.open(filename, "r") do |file|
      file.each_line do |line|
        line = line.strip
        next if line.empty? || line.start_with?("#")  # Skip empty lines and comments
        
        if line.include?("=")
          key, value = line.split("=", 2).map(&:strip)
          config[key] = value
        end
      end
    end
    config
  end
  
  # Write key-value configuration file
  def self.write_config(filename, config, comments = {})
    File.open(filename, "w") do |file|
      config.each do |key, value|
        file.puts "# #{comments[key]}" if comments[key]
        file.puts "#{key} = #{value}"
        file.puts
      end
    end
  end
  
  # Update configuration file
  def self.update_config(filename, updates)
    config = read_config(filename)
    config.merge!(updates)
    write_config(filename, config)
  end
end

# Using configuration file handler
# config = ConfigFileHandler.read_config("app.conf")
# puts config.inspect

# new_config = {
#   "database_host" => "localhost",
#   "database_port" => "5432",
#   "log_level" => "info"
# }
# 
# comments = {
#   "database_host" => "Database host address",
#   "database_port" => "Database port",
#   "log_level" => "Log level"
# }
# 
# ConfigFileHandler.write_config("app.conf", new_config, comments)

Log File Handler

ruby
class LogFileHandler
  def initialize(log_file)
    @log_file = log_file
  end
  
  # Write log
  def log(level, message)
    timestamp = Time.now.strftime("%Y-%m-%d %H:%M:%S")
    log_entry = "[#{timestamp}] #{level.upcase}: #{message}\n"
    
    File.open(@log_file, "a") do |file|
      file.write(log_entry)
    end
  end
  
  # Read recent log entries
  def recent_entries(count = 10)
    return [] unless File.exist?(@log_file)
    
    lines = File.readlines(@log_file)
    lines.last(count)
  end
  
  # Filter logs by level
  def filter_by_level(level)
    return [] unless File.exist?(@log_file)
    
    File.readlines(@log_file).select do |line|
      line.include?("[#{level.upcase}]")
    end
  end
  
  # Clear log file
  def clear
    File.open(@log_file, "w") { |file| file.truncate(0) }
  end
  
  # Get log file size
  def size
    File.size(@log_file) if File.exist?(@log_file)
  end
  
  # Rotate log files
  def rotate(max_size = 1024 * 1024)  # 1MB
    return unless File.exist?(@log_file)
    return if File.size(@log_file) < max_size
    
    # Rename current log file
    timestamp = Time.now.strftime("%Y%m%d_%H%M%S")
    backup_name = "#{@log_file}.#{timestamp}"
    File.rename(@log_file, backup_name)
    
    # Create new log file
    File.open(@log_file, "w") { |file| file.write("# New log file\n") }
  end
end

# Using log file handler
# logger = LogFileHandler.new("app.log")
# logger.log("info", "Application started")
# logger.log("error", "An error occurred")
# logger.log("debug", "Debug information")
# 
# puts "Recent logs:"
# logger.recent_entries(5).each { |entry| puts entry }
# 
# puts "Error logs:"
# logger.filter_by_level("error").each { |entry| puts entry }

CSV File Handler

ruby
require 'csv'

class CSVHandler
  # Read CSV file
  def self.read_csv(filename)
    data = []
    CSV.foreach(filename, headers: true) do |row|
      data << row.to_h
    end
    data
  end
  
  # Write CSV file
  def self.write_csv(filename, data, headers = nil)
    headers ||= data.first.keys if data.first.is_a?(Hash)
    
    CSV.open(filename, "w") do |csv|
      csv << headers
      data.each do |row|
        if row.is_a?(Hash)
          csv << headers.map { |header| row[header] }
        else
          csv << row
        end
      end
    end
  end
  
  # Append data to CSV file
  def self.append_to_csv(filename, row)
    CSV.open(filename, "a") do |csv|
      csv << row
    end
  end
  
  # Filter CSV data by condition
  def self.filter_csv(filename, &block)
    filtered_data = []
    CSV.foreach(filename, headers: true) do |row|
      filtered_data << row.to_h if block.call(row.to_h)
    end
    filtered_data
  end
  
  # CSV data statistics
  def self.csv_stats(filename, column)
    values = []
    CSV.foreach(filename, headers: true) do |row|
      values << row[column]
    end
    
    {
      count: values.length,
      unique_count: values.uniq.length,
      min: values.min,
      max: values.max
    }
  end
end

# Using CSV handler
# data = [
#   { "Name" => "Alice", "Age" => "25", "City" => "New York" },
#   { "Name" => "Bob", "Age" => "30", "City" => "Los Angeles" },
#   { "Name" => "Charlie", "Age" => "28", "City" => "Chicago" }
# ]
# 
# CSVHandler.write_csv("users.csv", data)
# 
# read_data = CSVHandler.read_csv("users.csv")
# puts read_data.inspect
# 
# filtered = CSVHandler.filter_csv("users.csv") { |row| row["Age"].to_i > 25 }
# puts filtered.inspect

📊 File Processing Performance Optimization

Large File Processing

ruby
# Process large file line by line
def process_large_file(filename)
  line_count = 0
  File.open(filename, "r") do |file|
    file.each_line do |line|
      line_count += 1
      # Process each line
      process_line(line)
    end
  end
  line_count
end

def process_line(line)
  # Process single line data
  puts "Processing line: #{line.chomp}"
end

# Use buffer to process large file
def process_large_file_with_buffer(filename, buffer_size = 1024)
  File.open(filename, "r") do |file|
    while buffer = file.read(buffer_size)
      # Process buffer data
      process_buffer(buffer)
    end
  end
end

def process_buffer(buffer)
  # Process buffer data
  puts "Processing buffer: #{buffer.length} bytes"
end

# Process file in chunks
def process_file_in_chunks(filename, chunk_size = 1000)
  File.open(filename, "r") do |file|
    chunk = []
    file.each_line do |line|
      chunk << line.chomp
      if chunk.length >= chunk_size
        process_chunk(chunk)
        chunk = []
      end
    end
    # Process last chunk
    process_chunk(chunk) unless chunk.empty?
  end
end

def process_chunk(chunk)
  # Process data chunk
  puts "Processing chunk: #{chunk.length} lines"
end

File Stream Processing

ruby
# Using IO stream processing
class FileStreamProcessor
  def self.process_stream(input_stream, output_stream, &block)
    input_stream.each_line do |line|
      processed_line = block.call(line)
      output_stream.write(processed_line)
    end
  end
  
  # File to file stream processing
  def self.process_file_to_file(input_file, output_file, &block)
    File.open(input_file, "r") do |input|
      File.open(output_file, "w") do |output|
        process_stream(input, output, &block)
      end
    end
  end
  
  # File to stdout stream processing
  def self.process_file_to_stdout(input_file, &block)
    File.open(input_file, "r") do |input|
      process_stream(input, STDOUT, &block)
    end
  end
end

# Using stream processor
# FileStreamProcessor.process_file_to_file("input.txt", "output.txt") do |line|
#   line.upcase
# end

🛡️ File Processing Security Best Practices

Safe File Operations

ruby
class SafeFileHandler
  # Safe file reading
  def self.safe_read(filename, max_size = 10 * 1024 * 1024)  # 10MB limit
    return nil unless File.exist?(filename)
    return nil if File.size(filename) > max_size
    
    File.read(filename)
  rescue => e
    puts "Error reading file: #{e.message}"
    nil
  end
  
  # Safe file writing
  def self.safe_write(filename, content, max_size = 10 * 1024 * 1024)
    return false if content.length > max_size
    
    File.write(filename, content)
    true
  rescue => e
    puts "Error writing file: #{e.message}"
    false
  end
  
  # Validate file path security
  def self.safe_path?(path, base_dir = Dir.pwd)
    # Resolve absolute path
    abs_path = File.expand_path(path, base_dir)
    base_abs_path = File.expand_path(base_dir)
    
    # Check if within base directory
    abs_path.start_with?(base_abs_path)
  end
  
  # Safe file deletion
  def self.safe_delete(filename)
    return false unless File.exist?(filename)
    
    File.delete(filename)
    true
  rescue => e
    puts "Error deleting file: #{e.message}"
    false
  end
  
  # Backup file
  def self.backup_file(filename)
    return false unless File.exist?(filename)
    
    backup_name = "#{filename}.backup.#{Time.now.to_i}"
    FileUtils.cp(filename, backup_name)
    backup_name
  rescue => e
    puts "Error backing up file: #{e.message}"
    false
  end
end

# Using safe file handler
# if SafeFileHandler.safe_path?("data.txt", "/safe/directory")
#   content = SafeFileHandler.safe_read("data.txt")
#   puts content
# end

# success = SafeFileHandler.safe_write("output.txt", "Hello, World!")
# puts "Write successful: #{success}"

# backup = SafeFileHandler.backup_file("important.txt")
# puts "Backup file: #{backup}"

File Permissions and Ownership

ruby
class FilePermissionManager
  # Check file permissions
  def self.check_permissions(filename)
    {
      readable: File.readable?(filename),
      writable: File.writable?(filename),
      executable: File.executable?(filename),
      owned: File.owned?(filename),
      owned_by_group: File.grpowned?(filename)
    }
  end
  
  # Set file permissions (Unix/Linux systems)
  def self.set_permissions(filename, permissions)
    File.chmod(permissions, filename)
  rescue => e
    puts "Error setting permissions: #{e.message}"
  end
  
  # Get file detailed information
  def self.file_info(filename)
    stat = File.stat(filename)
    {
      size: stat.size,
      mtime: stat.mtime,
      ctime: stat.ctime,
      atime: stat.atime,
      uid: stat.uid,
      gid: stat.gid,
      mode: "%o" % stat.mode,
      readable: stat.readable?,
      writable: stat.writable?,
      executable: stat.executable?
    }
  rescue => e
    puts "Error getting file info: #{e.message}"
    nil
  end
end

# Using file permission manager
# permissions = FilePermissionManager.check_permissions("example.txt")
# puts permissions.inspect

# FilePermissionManager.set_permissions("example.txt", 0644)

# info = FilePermissionManager.file_info("example.txt")
# puts info.inspect

🎯 File Processing Best Practices

1. Resource Management

ruby
# Always use block syntax to ensure files are properly closed
# Recommended
File.open("example.txt", "r") do |file|
  content = file.read
  # Process content
end  # File automatically closed

# Not recommended
file = File.open("example.txt", "r")
content = file.read
# Forget to close file
# file.close

# Use ensure for resource cleanup
def manual_file_handling
  file = File.open("example.txt", "r")
  begin
    content = file.read
    # Process content
  ensure
    file.close
  end
end

2. Error Handling

ruby
# Handle file operation exceptions
def robust_file_operation(filename)
  File.open(filename, "r") do |file|
    content = file.read
    process_content(content)
  end
rescue Errno::ENOENT
  puts "File #{filename} does not exist"
rescue Errno::EACCES
  puts "No permission to access file #{filename}"
rescue => e
  puts "Error processing file: #{e.message}"
end

# Check file existence
def safe_file_read(filename)
  unless File.exist?(filename)
    puts "File #{filename} does not exist"
    return nil
  end
  
  File.read(filename)
rescue => e
  puts "Error reading file: #{e.message}"
  nil
end

# Handle encoding issues
def read_with_encoding(filename, encoding = "UTF-8")
  File.open(filename, "r:#{encoding}") do |file|
    file.read
  end
rescue Encoding::InvalidByteSequenceError
  puts "File encoding is incorrect"
rescue => e
  puts "Error reading file: #{e.message}"
end

3. Performance Optimization

ruby
# For large files, use streaming processing
def process_large_file_efficiently(filename)
  File.open(filename, "r") do |file|
    file.each_line do |line|
      # Process each line, not read entire file
      process_line(line)
    end
  end
end

# Batch file operations
def batch_file_operations(filenames)
  filenames.each do |filename|
    begin
      process_file(filename)
    rescue => e
      puts "Error processing file #{filename}: #{e.message}"
    end
  end
end

# Use temporary files
def process_with_temp_file(data)
  temp_file = Tempfile.new("processing")
  begin
    temp_file.write(data)
    temp_file.close
    # Process temporary file
    result = process_file(temp_file.path)
    result
  ensure
    temp_file.unlink  # Delete temporary file
  end
end

4. Real Application Scenarios

ruby
# Log analyzer
class LogAnalyzer
  def initialize(log_file)
    @log_file = log_file
  end
  
  def analyze
    stats = {
      total_lines: 0,
      error_count: 0,
      warning_count: 0,
      info_count: 0,
      unique_ips: Set.new,
      requests_by_hour: Hash.new(0)
    }
    
    File.open(@log_file, "r") do |file|
      file.each_line do |line|
        stats[:total_lines] += 1
        
        # Parse log line
        if parsed_data = parse_log_line(line)
          # Count logs of different levels
          case parsed_data[:level]
          when "ERROR"
            stats[:error_count] += 1
          when "WARNING"
            stats[:warning_count] += 1
          when "INFO"
            stats[:info_count] += 1
          end
          
          # Collect unique IPs
          stats[:unique_ips] << parsed_data[:ip] if parsed_data[:ip]
          
          # Count requests by hour
          if parsed_data[:timestamp]
            hour = parsed_data[:timestamp].hour
            stats[:requests_by_hour][hour] += 1
          end
        end
      end
    end
    
    stats
  end
  
  private
  
  def parse_log_line(line)
    # Simplified log parsing
    # Actual applications may need more complex regular expressions
    {
      timestamp: Time.now,  # Simplified handling
      level: line.include?("ERROR") ? "ERROR" : 
             line.include?("WARNING") ? "WARNING" : "INFO",
      ip: line.match(/\d+\.\d+\.\d+\.\d+/)&.to_s,
      message: line
    }
  end
end

# Data importer
class DataImporter
  def self.import_csv_to_database(csv_file, database)
    imported_count = 0
    failed_count = 0
    
    CSV.foreach(csv_file, headers: true) do |row|
      begin
        # Convert data
        record = convert_row_to_record(row)
        
        # Insert into database
        database.insert(record)
        imported_count += 1
      rescue => e
        puts "Failed to import row: #{e.message}"
        failed_count += 1
      end
    end
    
    {
      imported: imported_count,
      failed: failed_count,
      total: imported_count + failed_count
    }
  end
  
  private
  
  def self.convert_row_to_record(row)
    # Convert data as needed
    row.to_h
  end
end

📚 Next Steps

After mastering Ruby file processing and I/O operations, continue learning:

Continue your Ruby learning journey!

Content is for learning and research only.