Skip to content

C# String Processing

This chapter will detail string processing in C#, including string creation, operations, formatting, regular expressions, etc., helping you master various techniques for text manipulation.

String Basics

String Characteristics

csharp
// Strings are immutable
string str1 = "Hello";
string str2 = str1;
str1 += " World";  // Creates new string, str1 points to new object

Console.WriteLine($"str1: {str1}");  // "Hello World"
Console.WriteLine($"str2: {str2}");  // "Hello" (unchanged)

// Strings are reference types
string a = "test";
string b = "test";
Console.WriteLine($"a == b: {a == b}");           // True (value equality)
Console.WriteLine($"ReferenceEquals(a, b): {ReferenceEquals(a, b)}");  // True (string interning)

// String length
string message = "Hello, C#!";
Console.WriteLine($"String length: {message.Length}");

// Access characters
Console.WriteLine($"First character: {message[0]}");      // 'H'
Console.WriteLine($"Last character: {message[message.Length - 1]}");  // '!'

String Creation Methods

csharp
// 1. String literals
string literal = "Hello World";

// 2. Verbatim strings
string path = @"C:\Users\Name\Documents\file.txt";
string multiline = @"This is a
multiline string
example";

// 3. String interpolation
string name = "Alice";
int age = 25;
string interpolated = $"Name: {name}, Age: {age}";

// 4. Create from character array
char[] chars = { 'H', 'e', 'l', 'l', 'o' };
string fromChars = new string(chars);

// 5. Create from repeated character
string repeated = new string('A', 5);  // "AAAAA"

// 6. Empty and whitespace strings
string empty = string.Empty;
string whitespace = "   ";

String Operations

Basic Operations

csharp
string text = "Hello World";

// Concatenation
string greeting = "Hello" + " " + "World";
string concat2 = string.Concat("Hello", " ", "World");

// Comparison
bool isEqual = text == "Hello World";
bool equals = text.Equals("Hello World");
bool equalsIgnoreCase = text.Equals("hello world", StringComparison.OrdinalIgnoreCase);

// Substring
string sub = text.Substring(6, 5);  // "World"

// Trim
string padded = "  Hello World  ";
string trimmed = padded.Trim();        // "Hello World"
string trimStart = padded.TrimStart();    // "Hello World  "
string trimEnd = padded.TrimEnd();      // "  Hello World"

// Replace
string replaced = text.Replace("World", "C#");  // "Hello C#"

// Upper and lower case
string upper = text.ToUpper();      // "HELLO WORLD"
string lower = text.ToLower();      // "hello world"

Search Operations

csharp
string text = "The quick brown fox jumps over the lazy dog";

// Contains
bool containsFox = text.Contains("fox");           // True
bool containsCat = text.Contains("cat");           // False

// StartsWith and EndsWith
bool startsWithThe = text.StartsWith("The");       // True
bool endsWithDog = text.EndsWith("dog");           // True

// IndexOf
int foxIndex = text.IndexOf("fox");               // 16
int dogIndex = text.LastIndexOf("dog");           // 40

// IndexOf with start position
int secondThe = text.IndexOf("The", 4);           // 31

// Any (LINQ)
bool hasVowel = text.Any(c => "AEIOUaeiou".Contains(c));

Split and Join

csharp
// Split string
string csv = "Apple,Banana,Orange,Grape";
string[] fruits = csv.Split(',');  // ["Apple", "Banana", "Orange", "Grape"]

// Split with options
string data = "Apple,,Banana,,Orange";
string[] parts = data.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries);

// Split by multiple delimiters
string sentence = "Hello World, how are you?";
string[] words = sentence.Split(new[] { ' ', ',', '?' }, StringSplitOptions.RemoveEmptyEntries);

// Join strings
string[] items = { "Apple", "Banana", "Orange" };
string joined = string.Join(", ", items);  // "Apple, Banana, Orange"

// Join with separator
string numbers = string.Join("-", 1, 2, 3, 4, 5);  // "1-2-3-4-5"

String Formatting

String.Format

csharp
// Basic formatting
string formatted = string.Format("Hello {0}, you are {1} years old", "Alice", 25);

// Number formatting
string number = string.Format("Price: {0:C}", 123.45);        // "Price: $123.45"
string percent = string.Format("Progress: {0:P}", 0.75);        // "Progress: 75.00%"

// Date formatting
DateTime now = DateTime.Now;
string date = string.Format("Today is {0:yyyy-MM-dd}", now);

// Custom formatting
string custom = string.Format("ID: {0:D5}", 42);  // "ID: 00042"

Composite Formatting

csharp
// Multiple placeholders
string message = string.Format(
    "User {0} ({1}) has {2} messages. Last login: {3:yyyy-MM-dd HH:mm}",
    "Alice", "alice@example.com", 5, DateTime.Now
);

Format Providers

csharp
// Culture-specific formatting
CultureInfo usCulture = new CultureInfo("en-US");
CultureInfo frenchCulture = new CultureInfo("fr-FR");

decimal amount = 1234.56m;
string usPrice = amount.ToString("C", usCulture);      // "$1,234.56"
string frenchPrice = amount.ToString("C", frenchCulture);  // "1 234,56 €"

// Custom format provider
NumberFormatInfo customFormat = new NumberFormatInfo
{
    CurrencySymbol = "€",
    CurrencyDecimalDigits = 2
};
string customPrice = amount.ToString("C", customFormat);

StringBuilder

Basic StringBuilder Usage

csharp
// Create StringBuilder
StringBuilder sb = new StringBuilder();

// Append strings
sb.Append("Hello");
sb.Append(" ");
sb.Append("World");

// Append line
sb.AppendLine("This is a new line");

// Append format
sb.AppendFormat("The answer is {0}", 42);

// Get final string
string result = sb.ToString();

StringBuilder Performance

csharp
// Efficient string building
public string BuildLargeString(int iterations)
{
    StringBuilder sb = new StringBuilder(iterations * 10);
    
    for (int i = 0; i < iterations; i++)
    {
        sb.AppendLine($"Line {i}: Some content here");
    }
    
    return sb.ToString();
}

// StringBuilder with capacity
StringBuilder sb2 = new StringBuilder(1000);  // Pre-allocate capacity

Regular Expressions

Basic Regex Patterns

csharp
using System.Text.RegularExpressions;

// Simple pattern matching
string text = "The price is $123.45 for item ABC123";
Regex priceRegex = new Regex(@"\$\d+\.\d{2}");
Match priceMatch = priceRegex.Match(text);

if (priceMatch.Success)
{
    Console.WriteLine($"Price found: {priceMatch.Value}");
}

// Email validation
string email = "user@example.com";
Regex emailRegex = new Regex(@"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$");
bool isValidEmail = emailRegex.IsMatch(email);

// Phone number extraction
string phoneNumber = "Call me at (555) 123-4567";
Regex phoneRegex = new Regex(@"\(?(\d{3})\)?[-\s]?(\d{3})[-\s]?(\d{4})");
Match phoneMatch = phoneRegex.Match(phoneNumber);

if (phoneMatch.Success)
{
    Console.WriteLine($"Area: {phoneMatch.Groups[1].Value}");
    Console.WriteLine($"Exchange: {phoneMatch.Groups[2].Value}");
    Console.WriteLine($"Number: {phoneMatch.Groups[3].Value}");
}

Regex Groups and Replacement

csharp
// Named groups
string logEntry = "2023-12-25 10:30:45 [ERROR] Database connection failed";
Regex logRegex = new Regex(@"(?<date>\d{4}-\d{2}-\d{2})\s+(?<time>\d{2}:\d{2}:\d{2})\s+\[(?<level>\w+)\]\s+(?<message>.+)");
Match logMatch = logRegex.Match(logEntry);

if (logMatch.Success)
{
    Console.WriteLine($"Date: {logMatch.Groups["date"].Value}");
    Console.WriteLine($"Time: {logMatch.Groups["time"].Value}");
    Console.WriteLine($"Level: {logMatch.Groups["level"].Value}");
    Console.WriteLine($"Message: {logMatch.Groups["message"].Value}");
}

// Replacement
string input = "User123 logged in at 2023-12-25";
string output = Regex.Replace(input, @"\d+", "***");
Console.WriteLine(output);  // "User*** logged in at ***-**-**"

String Extensions

Custom Extension Methods

csharp
public static class StringExtensions
{
    // Truncate with ellipsis
    public static string Truncate(this string text, int maxLength)
    {
        if (string.IsNullOrEmpty(text) || text.Length <= maxLength)
            return text;
        
        return text.Substring(0, maxLength - 3) + "...";
    }
    
    // Capitalize words
    public static string TitleCase(this string text)
    {
        if (string.IsNullOrEmpty(text))
            return text;
        
        TextInfo textInfo = CultureInfo.CurrentCulture.TextInfo;
        return textInfo.ToTitleCase(text.ToLower());
    }
    
    // Remove HTML tags
    public static string StripHtml(this string html)
    {
        if (string.IsNullOrEmpty(html))
            return html;
        
        return Regex.Replace(html, @"<[^>]*>", string.Empty);
    }
    
    // Is numeric
    public static bool IsNumeric(this string text)
    {
        return decimal.TryParse(text, out _);
    }
    
    // Count words
    public static int WordCount(this string text)
    {
        if (string.IsNullOrWhiteSpace(text))
            return 0;
        
        return text.Split(new[] { ' ', '\t', '\n', '\r' }, 
                      StringSplitOptions.RemoveEmptyEntries).Length;
    }
}

Using Extensions

csharp
// Usage of extension methods
string longText = "This is a very long text that needs to be truncated";
string truncated = longText.Truncate(20);  // "This is a very lo..."

string title = "the quick brown fox";
string titleCase = title.TitleCase();  // "The Quick Brown Fox"

string html = "<p>This is <b>bold</b> text</p>";
string cleanText = html.StripHtml();  // "This is bold text"

string number = "123.45";
bool isNumber = number.IsNumeric();  // True

string sentence = "Hello world, how are you today?";
int wordCount = sentence.WordCount();  // 6

Advanced String Processing

String Interpolation Advanced

csharp
// Format strings in interpolation
string name = "Alice";
decimal balance = 1234.56m;
DateTime date = DateTime.Now;

string formatted = $"{name,-10} | {balance,15:C} | {date:yyyy-MM-dd}";
// "Alice      | $1,234.56     | 2023-12-25"

// Conditional interpolation
int score = 85;
string grade = score >= 90 ? "A" :
              score >= 80 ? "B" :
              score >= 70 ? "C" :
              score >= 60 ? "D" : "F";

string result = $"Score: {score}, Grade: {grade}";

Raw String Literals (C# 11.0+)

csharp
// Raw string literals
string json1 = """{"name": "Alice", "age": 25}""";
string json2 = """
    {
        "name": "Alice",
        "age": 25,
        "active": true
    }
    """;

string filePath = """C:\Users\Documents\file.txt""";

String Span (C# 7.2+)

csharp
// ReadOnlySpan for efficient processing
ReadOnlySpan<char> text = "Hello World".AsSpan();

// Efficient substring without allocation
ReadOnlySpan<char> hello = text.Slice(0, 5);

// Efficient character checking
bool hasSpace = text.Contains(' ');

// Pattern matching with spans
ReadOnlySpan<char> pattern = "World".AsSpan();
bool endsWithWorld = text.EndsWith(pattern);

Practical Examples

Text File Processor

csharp
public class TextFileProcessor
{
    public static List<string> ProcessFile(string filePath)
    {
        var lines = File.ReadAllLines(filePath);
        var processedLines = new List<string>();
        
        foreach (string line in lines)
        {
            // Remove comments and empty lines
            if (string.IsNullOrWhiteSpace(line) || line.TrimStart().StartsWith("#"))
                continue;
            
            // Process line
            string trimmed = line.Trim();
            string processed = ProcessLine(trimmed);
            
            if (!string.IsNullOrEmpty(processed))
                processedLines.Add(processed);
        }
        
        return processedLines;
    }
    
    private static string ProcessLine(string line)
    {
        // Remove extra whitespace
        line = Regex.Replace(line, @"\s+", " ");
        
        // Convert to title case
        line = CultureInfo.CurrentCulture.TextInfo.ToTitleCase(line.ToLower());
        
        return line;
    }
}

Log Parser

csharp
public class LogParser
{
    public class LogEntry
    {
        public DateTime Timestamp { get; set; }
        public string Level { get; set; }
        public string Message { get; set; }
        public string Source { get; set; }
    }
    
    public static List<LogEntry> ParseLogs(string logContent)
    {
        var entries = new List<LogEntry>();
        var logRegex = new Regex(@"(?<timestamp>\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})\s+\[(?<level>\w+)\]\s+(?<source>\w+):\s+(?<message>.+)");
        
        foreach (Match match in logRegex.Matches(logContent))
        {
            entries.Add(new LogEntry
            {
                Timestamp = DateTime.Parse(match.Groups["timestamp"].Value),
                Level = match.Groups["level"].Value,
                Source = match.Groups["source"].Value,
                Message = match.Groups["message"].Value
            });
        }
        
        return entries;
    }
    
    public static List<LogEntry> FilterByLevel(List<LogEntry> entries, string level)
    {
        return entries.Where(e => e.Level.Equals(level, StringComparison.OrdinalIgnoreCase)).ToList();
    }
}

String Utilities Library

csharp
public static class StringUtils
{
    // Generate random string
    public static string RandomString(int length, string allowedChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789")
    {
        Random random = new Random();
        char[] result = new char[length];
        
        for (int i = 0; i < length; i++)
        {
            result[i] = allowedChars[random.Next(allowedChars.Length)];
        }
        
        return new string(result);
    }
    
    // Convert to safe filename
    public static string ToSafeFileName(this string fileName)
    {
        foreach (char c in Path.GetInvalidFileNameChars())
        {
            fileName = fileName.Replace(c, '_');
        }
        
        return fileName.Trim();
    }
    
    // Extract numbers from string
    public static List<int> ExtractNumbers(this string text)
    {
        var numbers = new List<int>();
        var matches = Regex.Matches(text, @"\d+");
        
        foreach (Match match in matches)
        {
            if (int.TryParse(match.Value, out int number))
                numbers.Add(number);
        }
        
        return numbers;
    }
    
    // Levenshtein distance (string similarity)
    public static int LevenshteinDistance(this string s1, string s2)
    {
        int[,] matrix = new int[s1.Length + 1, s2.Length + 1];
        
        for (int i = 0; i <= s1.Length; i++)
            matrix[i, 0] = i;
        
        for (int j = 0; j <= s2.Length; j++)
            matrix[0, j] = j;
        
        for (int i = 1; i <= s1.Length; i++)
        {
            for (int j = 1; j <= s2.Length; j++)
            {
                int cost = (s1[i - 1] == s2[j - 1]) ? 0 : 1;
                matrix[i, j] = Math.Min(
                    Math.Min(matrix[i - 1, j] + 1, matrix[i, j - 1] + 1),
                    matrix[i - 1, j - 1] + cost);
            }
        }
        
        return matrix[s1.Length, s2.Length];
    }
}

Performance Considerations

String vs StringBuilder

csharp
// String concatenation in loop (bad performance)
public string BuildStringBad(int count)
{
    string result = "";
    for (int i = 0; i < count; i++)
    {
        result += i.ToString() + " ";  // Creates new string each iteration
    }
    return result;
}

// StringBuilder (good performance)
public string BuildStringGood(int count)
{
    StringBuilder sb = new StringBuilder(count * 10);
    for (int i = 0; i < count; i++)
    {
        sb.Append(i).Append(" ");
    }
    return sb.ToString();
}

Memory Efficiency

csharp
// Use ReadOnlySpan for zero-allocation operations
public bool ContainsWord(ReadOnlySpan<char> text, ReadOnlySpan<char> word)
{
    return text.Contains(word, StringComparison.OrdinalIgnoreCase);
}

// Use string pooling for frequently used strings
public class StringPool
{
    private static readonly ConcurrentDictionary<string, string> _pool = new();
    
    public static string GetInterned(string value)
    {
        return _pool.GetOrAdd(value, value);
    }
}

Best Practices

String Comparison

csharp
// Use StringComparison for consistent comparison
string name1 = "Alice";
string name2 = "alice";

// Good: Specify comparison type
bool isEqual = name1.Equals(name2, StringComparison.OrdinalIgnoreCase);

// Bad: Culture-dependent comparison
bool isBad = name1.Equals(name2);  // May fail on different cultures

Null and Empty Handling

csharp
// Safe string operations
public static string SafeTrim(this string text)
{
    return text?.Trim() ?? string.Empty;
}

public static bool IsNullOrEmpty(this string text)
{
    return string.IsNullOrEmpty(text);
}

public static bool IsNullOrWhiteSpace(this string text)
{
    return string.IsNullOrWhiteSpace(text);
}

Summary

In this chapter, you learned:

  • String characteristics and creation methods
  • Basic string operations: concatenation, comparison, search, split, join
  • String formatting with String.Format and interpolation
  • StringBuilder for efficient string building
  • Regular expressions for pattern matching
  • Custom extension methods
  • Advanced features: raw strings, string spans
  • Performance considerations and best practices

String processing is fundamental for text manipulation, data validation, and user interface development. Mastering these techniques will make you more effective in handling textual data in C# applications. In the next chapter, we'll explore object-oriented programming with classes and objects.

Content is for learning and research only.