Skip to content

Julia Dictionaries and Sets

Dictionaries (Dict) and Sets (Set) are important data structures in Julia for storing key-value pairs and unique elements.

Dictionaries (Dict)

Creating Dictionaries

julia
# Using Dict constructor
d = Dict("a" => 1, "b" => 2, "c" => 3)
println(d)

# Empty dictionary
empty_dict = Dict()
empty_typed = Dict{String, Int}()

# Create from arrays
keys_arr = ["a", "b", "c"]
vals_arr = [1, 2, 3]
d = Dict(zip(keys_arr, vals_arr))

# Using comprehension
d = Dict(x => x^2 for x in 1:5)
println(d)  # Dict(1=>1, 2=>4, 3=>9, 4=>16, 5=>25)

Accessing Elements

julia
d = Dict("apple" => 5, "banana" => 3, "cherry" => 8)

# Access by key
println(d["apple"])  # 5

# Using get (with default value)
println(get(d, "apple", 0))    # 5
println(get(d, "orange", 0))   # 0 (returns default when not exists)

# get! - get or insert default value
println(get!(d, "orange", 10)) # 10 (inserts and returns)
println(d["orange"])           # 10

# Check if key exists
println(haskey(d, "apple"))    # true
println("apple" in keys(d))    # true

Modifying Dictionaries

julia
d = Dict{String, Int}()

# Add/update elements
d["a"] = 1
d["b"] = 2
println(d)

# Batch add
merge!(d, Dict("c" => 3, "d" => 4))
println(d)

# Delete element
delete!(d, "a")
println(d)

# pop - delete and return value
val = pop!(d, "b")
println(val)  # 2

# pop with default value
val = pop!(d, "x", -1)
println(val)  # -1

# Clear dictionary
empty!(d)
println(isempty(d))  # true

Iterating Dictionaries

julia
d = Dict("a" => 1, "b" => 2, "c" => 3)

# Iterate key-value pairs
for (key, value) in d
    println("$key => $value")
end

# Iterate keys only
for key in keys(d)
    println(key)
end

# Iterate values only
for value in values(d)
    println(value)
end

# Using pairs
for pair in pairs(d)
    println(pair)  # "a" => 1 etc.
end

Dictionary Operations

julia
d1 = Dict("a" => 1, "b" => 2)
d2 = Dict("b" => 20, "c" => 3)

# Merge (creates new dictionary)
d3 = merge(d1, d2)
println(d3)  # Dict("a"=>1, "b"=>20, "c"=>3)

# Merge (in-place)
merge!(d1, d2)

# Merge with custom conflict handling
d1 = Dict("a" => 1, "b" => 2)
d2 = Dict("b" => 20, "c" => 3)
d3 = merge(+, d1, d2)  # Add values for same keys
println(d3)  # Dict("a"=>1, "b"=>22, "c"=>3)

# Filter
d = Dict("a" => 1, "b" => 2, "c" => 3, "d" => 4)
filtered = filter(p -> p.second > 2, d)
println(filtered)  # Dict("c"=>3, "d"=>4)

Dictionary Properties

julia
d = Dict("a" => 1, "b" => 2, "c" => 3)

# Length
println(length(d))  # 3

# Keys and values
println(keys(d))    # Collection of keys
println(values(d))  # Collection of values

# Convert to arrays
println(collect(keys(d)))
println(collect(values(d)))

# Key-value pairs array
println(collect(d))  # [("a", 1), ("b", 2), ("c", 3)]

Ordered Dictionaries

Standard Dict doesn't guarantee order. Use OrderedDict for ordered dictionaries:

julia
using OrderedCollections

# Create ordered dictionary
od = OrderedDict("a" => 1, "b" => 2, "c" => 3)

# Maintains insertion order
od["d"] = 4
for (k, v) in od
    println("$k => $v")
end
# Outputs in insertion order

Default Dictionaries

julia
using DataStructures

# Create dictionary with default value
dd = DefaultDict{String, Int}(0)
dd["a"] += 1
dd["b"] += 1
dd["a"] += 1
println(dd["a"])  # 2
println(dd["c"])  # 0 (returns default for unset keys)

# Default value as empty array
dd = DefaultDict{String, Vector{Int}}(Vector{Int})
push!(dd["a"], 1)
push!(dd["a"], 2)
push!(dd["b"], 3)
println(dd)

Sets (Set)

Creating Sets

julia
# Using Set constructor
s = Set([1, 2, 3, 4, 5])
println(s)

# Empty set
empty_set = Set()
empty_typed = Set{Int}()

# Create from other collections
s = Set("hello")  # Character set
println(s)  # Set(['h', 'e', 'l', 'o'])

# Using comprehension
s = Set(x^2 for x in 1:5)
println(s)  # Set([1, 4, 9, 16, 25])

Set Operations

julia
s = Set([1, 2, 3])

# Add element
push!(s, 4)
println(s)  # Set([1, 2, 3, 4])

# Add multiple elements
union!(s, [5, 6])
println(s)

# Delete element
delete!(s, 1)
println(s)

# pop - delete and return arbitrary element
elem = pop!(s)
println("Deleted: $elem")

# Check membership
println(2 in s)  # true
println(2 s)   # true (Unicode)
println(100 in s) # false
println(100 s)  # true

Set Operations

julia
a = Set([1, 2, 3, 4])
b = Set([3, 4, 5, 6])

# Union
println(union(a, b))       # Set([1, 2, 3, 4, 5, 6])
println(a  b)             # Same (Unicode)

# Intersection
println(intersect(a, b))   # Set([3, 4])
println(a  b)             # Same (Unicode)

# Difference
println(setdiff(a, b))     # Set([1, 2])

# Symmetric difference (in a or b, but not both)
println(symdiff(a, b))     # Set([1, 2, 5, 6])

# Subset check
println(issubset([1, 2], a))  # true
println([1, 2]  a)           # true
println(a  [1, 2])           # true (superset)

# Equality
println(Set([1, 2]) == Set([2, 1]))  # true

In-place Set Operations

julia
a = Set([1, 2, 3, 4])
b = Set([3, 4, 5, 6])

# In-place union
union!(a, b)
println(a)  # Set([1, 2, 3, 4, 5, 6])

# In-place intersection
a = Set([1, 2, 3, 4])
intersect!(a, b)
println(a)  # Set([3, 4])

# In-place difference
a = Set([1, 2, 3, 4])
setdiff!(a, b)
println(a)  # Set([1, 2])

Set Properties

julia
s = Set([1, 2, 3, 4, 5])

# Length
println(length(s))  # 5

# Is empty
println(isempty(s))  # false

# Convert to array
arr = collect(s)
println(arr)

# Sorted array
sorted = sort(collect(s))
println(sorted)

Practical Examples

Word Frequency Count

julia
function word_frequency(text)
    words = split(lowercase(text))
    freq = Dict{String, Int}()
    
    for word in words
        freq[word] = get(freq, word, 0) + 1
    end
    
    return freq
end

text = "the quick brown fox jumps over the lazy dog the fox"
freq = word_frequency(text)

# Sort by frequency
sorted_freq = sort(collect(freq), by=x->x[2], rev=true)
for (word, count) in sorted_freq
    println("$word: $count")
end

Unique Ordered

julia
function unique_ordered(arr)
    seen = Set{eltype(arr)}()
    result = eltype(arr)[]
    
    for x in arr
        if x  seen
            push!(seen, x)
            push!(result, x)
        end
    end
    
    return result
end

arr = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5]
println(unique_ordered(arr))  # [3, 1, 4, 5, 9, 2, 6]

Grouping Data

julia
function group_by(arr, key_func)
    groups = Dict{Any, Vector}()
    
    for item in arr
        key = key_func(item)
        if !haskey(groups, key)
            groups[key] = []
        end
        push!(groups[key], item)
    end
    
    return groups
end

# Group by even/odd
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
grouped = group_by(numbers, x -> x % 2 == 0 ? "even" : "odd")
println(grouped)

# Group by first letter
words = ["apple", "banana", "avocado", "blueberry", "cherry"]
grouped = group_by(words, w -> w[1])
println(grouped)

Bidirectional Mapping

julia
struct BiDict{K, V}
    forward::Dict{K, V}
    backward::Dict{V, K}
end

function BiDict{K, V}() where {K, V}
    BiDict(Dict{K, V}(), Dict{V, K}())
end

function Base.setindex!(bd::BiDict, value, key)
    bd.forward[key] = value
    bd.backward[value] = key
end

function get_value(bd::BiDict, key)
    return bd.forward[key]
end

function get_key(bd::BiDict, value)
    return bd.backward[value]
end

# Usage
bd = BiDict{String, Int}()
bd["one"] = 1
bd["two"] = 2

println(get_value(bd, "one"))  # 1
println(get_key(bd, 2))        # "two"

Caching/Memoization

julia
function memoize(f)
    cache = Dict()
    
    function memoized(args...)
        if !haskey(cache, args)
            cache[args] = f(args...)
        end
        return cache[args]
    end
    
    return memoized
end

# Usage
slow_fib(n) = n <= 2 ? 1 : slow_fib(n-1) + slow_fib(n-2)

fast_fib = memoize(function(n)
    n <= 2 ? 1 : fast_fib(n-1) + fast_fib(n-2)
end)

@time println(fast_fib(40))  # Fast

Performance Tips

julia
# 1. Specify types
d = Dict{String, Int}()  # More efficient than Dict()

# 2. Use sizehint! to preallocate
d = Dict{String, Int}()
sizehint!(d, 1000)  # Expecting 1000 elements

# 3. Avoid frequent string key creation
# Bad
for i in 1:1000
    d["key_$i"] = i
end

# Good
keys_arr = ["key_$i" for i in 1:1000]
for i in 1:1000
    d[keys_arr[i]] = i
end

Next Steps

After learning dictionaries and sets, continue with:

Content is for learning and research only.