C# String Processing
This chapter will detail string processing in C#, including string creation, operations, formatting, regular expressions, etc., helping you master various techniques for text manipulation.
String Basics
String Characteristics
csharp
// Strings are immutable
string str1 = "Hello";
string str2 = str1;
str1 += " World"; // Creates new string, str1 points to new object
Console.WriteLine($"str1: {str1}"); // "Hello World"
Console.WriteLine($"str2: {str2}"); // "Hello" (unchanged)
// Strings are reference types
string a = "test";
string b = "test";
Console.WriteLine($"a == b: {a == b}"); // True (value equality)
Console.WriteLine($"ReferenceEquals(a, b): {ReferenceEquals(a, b)}"); // True (string interning)
// String length
string message = "Hello, C#!";
Console.WriteLine($"String length: {message.Length}");
// Access characters
Console.WriteLine($"First character: {message[0]}"); // 'H'
Console.WriteLine($"Last character: {message[message.Length - 1]}"); // '!'String Creation Methods
csharp
// 1. String literals
string literal = "Hello World";
// 2. Verbatim strings
string path = @"C:\Users\Name\Documents\file.txt";
string multiline = @"This is a
multiline string
example";
// 3. String interpolation
string name = "Alice";
int age = 25;
string interpolated = $"Name: {name}, Age: {age}";
// 4. Create from character array
char[] chars = { 'H', 'e', 'l', 'l', 'o' };
string fromChars = new string(chars);
// 5. Create from repeated character
string repeated = new string('A', 5); // "AAAAA"
// 6. Empty and whitespace strings
string empty = string.Empty;
string whitespace = " ";String Operations
Basic Operations
csharp
string text = "Hello World";
// Concatenation
string greeting = "Hello" + " " + "World";
string concat2 = string.Concat("Hello", " ", "World");
// Comparison
bool isEqual = text == "Hello World";
bool equals = text.Equals("Hello World");
bool equalsIgnoreCase = text.Equals("hello world", StringComparison.OrdinalIgnoreCase);
// Substring
string sub = text.Substring(6, 5); // "World"
// Trim
string padded = " Hello World ";
string trimmed = padded.Trim(); // "Hello World"
string trimStart = padded.TrimStart(); // "Hello World "
string trimEnd = padded.TrimEnd(); // " Hello World"
// Replace
string replaced = text.Replace("World", "C#"); // "Hello C#"
// Upper and lower case
string upper = text.ToUpper(); // "HELLO WORLD"
string lower = text.ToLower(); // "hello world"Search Operations
csharp
string text = "The quick brown fox jumps over the lazy dog";
// Contains
bool containsFox = text.Contains("fox"); // True
bool containsCat = text.Contains("cat"); // False
// StartsWith and EndsWith
bool startsWithThe = text.StartsWith("The"); // True
bool endsWithDog = text.EndsWith("dog"); // True
// IndexOf
int foxIndex = text.IndexOf("fox"); // 16
int dogIndex = text.LastIndexOf("dog"); // 40
// IndexOf with start position
int secondThe = text.IndexOf("The", 4); // 31
// Any (LINQ)
bool hasVowel = text.Any(c => "AEIOUaeiou".Contains(c));Split and Join
csharp
// Split string
string csv = "Apple,Banana,Orange,Grape";
string[] fruits = csv.Split(','); // ["Apple", "Banana", "Orange", "Grape"]
// Split with options
string data = "Apple,,Banana,,Orange";
string[] parts = data.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
// Split by multiple delimiters
string sentence = "Hello World, how are you?";
string[] words = sentence.Split(new[] { ' ', ',', '?' }, StringSplitOptions.RemoveEmptyEntries);
// Join strings
string[] items = { "Apple", "Banana", "Orange" };
string joined = string.Join(", ", items); // "Apple, Banana, Orange"
// Join with separator
string numbers = string.Join("-", 1, 2, 3, 4, 5); // "1-2-3-4-5"String Formatting
String.Format
csharp
// Basic formatting
string formatted = string.Format("Hello {0}, you are {1} years old", "Alice", 25);
// Number formatting
string number = string.Format("Price: {0:C}", 123.45); // "Price: $123.45"
string percent = string.Format("Progress: {0:P}", 0.75); // "Progress: 75.00%"
// Date formatting
DateTime now = DateTime.Now;
string date = string.Format("Today is {0:yyyy-MM-dd}", now);
// Custom formatting
string custom = string.Format("ID: {0:D5}", 42); // "ID: 00042"Composite Formatting
csharp
// Multiple placeholders
string message = string.Format(
"User {0} ({1}) has {2} messages. Last login: {3:yyyy-MM-dd HH:mm}",
"Alice", "alice@example.com", 5, DateTime.Now
);Format Providers
csharp
// Culture-specific formatting
CultureInfo usCulture = new CultureInfo("en-US");
CultureInfo frenchCulture = new CultureInfo("fr-FR");
decimal amount = 1234.56m;
string usPrice = amount.ToString("C", usCulture); // "$1,234.56"
string frenchPrice = amount.ToString("C", frenchCulture); // "1 234,56 €"
// Custom format provider
NumberFormatInfo customFormat = new NumberFormatInfo
{
CurrencySymbol = "€",
CurrencyDecimalDigits = 2
};
string customPrice = amount.ToString("C", customFormat);StringBuilder
Basic StringBuilder Usage
csharp
// Create StringBuilder
StringBuilder sb = new StringBuilder();
// Append strings
sb.Append("Hello");
sb.Append(" ");
sb.Append("World");
// Append line
sb.AppendLine("This is a new line");
// Append format
sb.AppendFormat("The answer is {0}", 42);
// Get final string
string result = sb.ToString();StringBuilder Performance
csharp
// Efficient string building
public string BuildLargeString(int iterations)
{
StringBuilder sb = new StringBuilder(iterations * 10);
for (int i = 0; i < iterations; i++)
{
sb.AppendLine($"Line {i}: Some content here");
}
return sb.ToString();
}
// StringBuilder with capacity
StringBuilder sb2 = new StringBuilder(1000); // Pre-allocate capacityRegular Expressions
Basic Regex Patterns
csharp
using System.Text.RegularExpressions;
// Simple pattern matching
string text = "The price is $123.45 for item ABC123";
Regex priceRegex = new Regex(@"\$\d+\.\d{2}");
Match priceMatch = priceRegex.Match(text);
if (priceMatch.Success)
{
Console.WriteLine($"Price found: {priceMatch.Value}");
}
// Email validation
string email = "user@example.com";
Regex emailRegex = new Regex(@"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$");
bool isValidEmail = emailRegex.IsMatch(email);
// Phone number extraction
string phoneNumber = "Call me at (555) 123-4567";
Regex phoneRegex = new Regex(@"\(?(\d{3})\)?[-\s]?(\d{3})[-\s]?(\d{4})");
Match phoneMatch = phoneRegex.Match(phoneNumber);
if (phoneMatch.Success)
{
Console.WriteLine($"Area: {phoneMatch.Groups[1].Value}");
Console.WriteLine($"Exchange: {phoneMatch.Groups[2].Value}");
Console.WriteLine($"Number: {phoneMatch.Groups[3].Value}");
}Regex Groups and Replacement
csharp
// Named groups
string logEntry = "2023-12-25 10:30:45 [ERROR] Database connection failed";
Regex logRegex = new Regex(@"(?<date>\d{4}-\d{2}-\d{2})\s+(?<time>\d{2}:\d{2}:\d{2})\s+\[(?<level>\w+)\]\s+(?<message>.+)");
Match logMatch = logRegex.Match(logEntry);
if (logMatch.Success)
{
Console.WriteLine($"Date: {logMatch.Groups["date"].Value}");
Console.WriteLine($"Time: {logMatch.Groups["time"].Value}");
Console.WriteLine($"Level: {logMatch.Groups["level"].Value}");
Console.WriteLine($"Message: {logMatch.Groups["message"].Value}");
}
// Replacement
string input = "User123 logged in at 2023-12-25";
string output = Regex.Replace(input, @"\d+", "***");
Console.WriteLine(output); // "User*** logged in at ***-**-**"String Extensions
Custom Extension Methods
csharp
public static class StringExtensions
{
// Truncate with ellipsis
public static string Truncate(this string text, int maxLength)
{
if (string.IsNullOrEmpty(text) || text.Length <= maxLength)
return text;
return text.Substring(0, maxLength - 3) + "...";
}
// Capitalize words
public static string TitleCase(this string text)
{
if (string.IsNullOrEmpty(text))
return text;
TextInfo textInfo = CultureInfo.CurrentCulture.TextInfo;
return textInfo.ToTitleCase(text.ToLower());
}
// Remove HTML tags
public static string StripHtml(this string html)
{
if (string.IsNullOrEmpty(html))
return html;
return Regex.Replace(html, @"<[^>]*>", string.Empty);
}
// Is numeric
public static bool IsNumeric(this string text)
{
return decimal.TryParse(text, out _);
}
// Count words
public static int WordCount(this string text)
{
if (string.IsNullOrWhiteSpace(text))
return 0;
return text.Split(new[] { ' ', '\t', '\n', '\r' },
StringSplitOptions.RemoveEmptyEntries).Length;
}
}Using Extensions
csharp
// Usage of extension methods
string longText = "This is a very long text that needs to be truncated";
string truncated = longText.Truncate(20); // "This is a very lo..."
string title = "the quick brown fox";
string titleCase = title.TitleCase(); // "The Quick Brown Fox"
string html = "<p>This is <b>bold</b> text</p>";
string cleanText = html.StripHtml(); // "This is bold text"
string number = "123.45";
bool isNumber = number.IsNumeric(); // True
string sentence = "Hello world, how are you today?";
int wordCount = sentence.WordCount(); // 6Advanced String Processing
String Interpolation Advanced
csharp
// Format strings in interpolation
string name = "Alice";
decimal balance = 1234.56m;
DateTime date = DateTime.Now;
string formatted = $"{name,-10} | {balance,15:C} | {date:yyyy-MM-dd}";
// "Alice | $1,234.56 | 2023-12-25"
// Conditional interpolation
int score = 85;
string grade = score >= 90 ? "A" :
score >= 80 ? "B" :
score >= 70 ? "C" :
score >= 60 ? "D" : "F";
string result = $"Score: {score}, Grade: {grade}";Raw String Literals (C# 11.0+)
csharp
// Raw string literals
string json1 = """{"name": "Alice", "age": 25}""";
string json2 = """
{
"name": "Alice",
"age": 25,
"active": true
}
""";
string filePath = """C:\Users\Documents\file.txt""";String Span (C# 7.2+)
csharp
// ReadOnlySpan for efficient processing
ReadOnlySpan<char> text = "Hello World".AsSpan();
// Efficient substring without allocation
ReadOnlySpan<char> hello = text.Slice(0, 5);
// Efficient character checking
bool hasSpace = text.Contains(' ');
// Pattern matching with spans
ReadOnlySpan<char> pattern = "World".AsSpan();
bool endsWithWorld = text.EndsWith(pattern);Practical Examples
Text File Processor
csharp
public class TextFileProcessor
{
public static List<string> ProcessFile(string filePath)
{
var lines = File.ReadAllLines(filePath);
var processedLines = new List<string>();
foreach (string line in lines)
{
// Remove comments and empty lines
if (string.IsNullOrWhiteSpace(line) || line.TrimStart().StartsWith("#"))
continue;
// Process line
string trimmed = line.Trim();
string processed = ProcessLine(trimmed);
if (!string.IsNullOrEmpty(processed))
processedLines.Add(processed);
}
return processedLines;
}
private static string ProcessLine(string line)
{
// Remove extra whitespace
line = Regex.Replace(line, @"\s+", " ");
// Convert to title case
line = CultureInfo.CurrentCulture.TextInfo.ToTitleCase(line.ToLower());
return line;
}
}Log Parser
csharp
public class LogParser
{
public class LogEntry
{
public DateTime Timestamp { get; set; }
public string Level { get; set; }
public string Message { get; set; }
public string Source { get; set; }
}
public static List<LogEntry> ParseLogs(string logContent)
{
var entries = new List<LogEntry>();
var logRegex = new Regex(@"(?<timestamp>\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})\s+\[(?<level>\w+)\]\s+(?<source>\w+):\s+(?<message>.+)");
foreach (Match match in logRegex.Matches(logContent))
{
entries.Add(new LogEntry
{
Timestamp = DateTime.Parse(match.Groups["timestamp"].Value),
Level = match.Groups["level"].Value,
Source = match.Groups["source"].Value,
Message = match.Groups["message"].Value
});
}
return entries;
}
public static List<LogEntry> FilterByLevel(List<LogEntry> entries, string level)
{
return entries.Where(e => e.Level.Equals(level, StringComparison.OrdinalIgnoreCase)).ToList();
}
}String Utilities Library
csharp
public static class StringUtils
{
// Generate random string
public static string RandomString(int length, string allowedChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789")
{
Random random = new Random();
char[] result = new char[length];
for (int i = 0; i < length; i++)
{
result[i] = allowedChars[random.Next(allowedChars.Length)];
}
return new string(result);
}
// Convert to safe filename
public static string ToSafeFileName(this string fileName)
{
foreach (char c in Path.GetInvalidFileNameChars())
{
fileName = fileName.Replace(c, '_');
}
return fileName.Trim();
}
// Extract numbers from string
public static List<int> ExtractNumbers(this string text)
{
var numbers = new List<int>();
var matches = Regex.Matches(text, @"\d+");
foreach (Match match in matches)
{
if (int.TryParse(match.Value, out int number))
numbers.Add(number);
}
return numbers;
}
// Levenshtein distance (string similarity)
public static int LevenshteinDistance(this string s1, string s2)
{
int[,] matrix = new int[s1.Length + 1, s2.Length + 1];
for (int i = 0; i <= s1.Length; i++)
matrix[i, 0] = i;
for (int j = 0; j <= s2.Length; j++)
matrix[0, j] = j;
for (int i = 1; i <= s1.Length; i++)
{
for (int j = 1; j <= s2.Length; j++)
{
int cost = (s1[i - 1] == s2[j - 1]) ? 0 : 1;
matrix[i, j] = Math.Min(
Math.Min(matrix[i - 1, j] + 1, matrix[i, j - 1] + 1),
matrix[i - 1, j - 1] + cost);
}
}
return matrix[s1.Length, s2.Length];
}
}Performance Considerations
String vs StringBuilder
csharp
// String concatenation in loop (bad performance)
public string BuildStringBad(int count)
{
string result = "";
for (int i = 0; i < count; i++)
{
result += i.ToString() + " "; // Creates new string each iteration
}
return result;
}
// StringBuilder (good performance)
public string BuildStringGood(int count)
{
StringBuilder sb = new StringBuilder(count * 10);
for (int i = 0; i < count; i++)
{
sb.Append(i).Append(" ");
}
return sb.ToString();
}Memory Efficiency
csharp
// Use ReadOnlySpan for zero-allocation operations
public bool ContainsWord(ReadOnlySpan<char> text, ReadOnlySpan<char> word)
{
return text.Contains(word, StringComparison.OrdinalIgnoreCase);
}
// Use string pooling for frequently used strings
public class StringPool
{
private static readonly ConcurrentDictionary<string, string> _pool = new();
public static string GetInterned(string value)
{
return _pool.GetOrAdd(value, value);
}
}Best Practices
String Comparison
csharp
// Use StringComparison for consistent comparison
string name1 = "Alice";
string name2 = "alice";
// Good: Specify comparison type
bool isEqual = name1.Equals(name2, StringComparison.OrdinalIgnoreCase);
// Bad: Culture-dependent comparison
bool isBad = name1.Equals(name2); // May fail on different culturesNull and Empty Handling
csharp
// Safe string operations
public static string SafeTrim(this string text)
{
return text?.Trim() ?? string.Empty;
}
public static bool IsNullOrEmpty(this string text)
{
return string.IsNullOrEmpty(text);
}
public static bool IsNullOrWhiteSpace(this string text)
{
return string.IsNullOrWhiteSpace(text);
}Summary
In this chapter, you learned:
- String characteristics and creation methods
- Basic string operations: concatenation, comparison, search, split, join
- String formatting with String.Format and interpolation
- StringBuilder for efficient string building
- Regular expressions for pattern matching
- Custom extension methods
- Advanced features: raw strings, string spans
- Performance considerations and best practices
String processing is fundamental for text manipulation, data validation, and user interface development. Mastering these techniques will make you more effective in handling textual data in C# applications. In the next chapter, we'll explore object-oriented programming with classes and objects.