Using with ManifoldLearning

by: Eric S. Téllez

This demonstration is about using SimilaritySearch and ManifoldLearning methods through SimSearchManifoldLearning.

using SimilaritySearch, SimSearchManifoldLearning, ManifoldLearning, Primes, PlotlyLight, StatsBase, LinearAlgebra, Markdown, Random

SCurve example


X, L = ManifoldLearning.scurve(segments=5)

data = [Config(
    x = X[1, :],
    y = X[2, :],
    z = X[3, :],
    type = "scatter3d",
    mode = "markers",
    marker = (
        color = L,
        colorscale = "Viridis", # "Default", "Portland", "Jet", etc.
        size = 3,
        opacity = 0.5,
        line = (width = 0,)
    )
)]

layout = Config(
    width = 800,
    height = 800,
    scene = Config(
        xaxis = (title = "X",),
        yaxis = (title = "Y",),
        zaxis = (title = "Z",),
        aspectmode = "cube" # Mantiene las proporciones iguales en los 3 ejes
    ),
)

Plot(data, layout)

SimilaritySearch support exact and approximate algorithms to solve k nearest neighbors. Also, it supports different metrics. For instance, let see how the selection of the distance function modifies the projection.

Manhattan distance (\(L_1\))

let Y = predict(fit(Isomap, X, nntype=ApproxManhattan))
    data = [Config(
    x = Y[1, :],
    y = Y[2, :],
    type = "scatter",
    mode = "markers",
    marker = (
        color = L,
        colorscale = "Viridis", # "Default", "Portland", "Jet", etc.
        size = 3,
        opacity = 0.5,
        line = (width = 0,)
    )
)]

    Plot(data)
end
LOG add! sp=1 ep=33 n=0 BeamSearch(bsize=4, Δ=1.0, maxvisits=1000000) mem=1GB max-rss=2GB 2026-04-02T20:42:53.385
LOG n.size quantiles:[0.0, 2.0, 3.0, 3.0, 5.0]
  0.007393 seconds (3.00 k allocations: 62.656 KiB)

Euclidean distance (\(L_2\))

let
  E = predict(fit(Isomap, X, nntype=ApproxEuclidean))
    data = [Config(
    x = E[1, :],
    y = E[2, :],
    type = "scatter",
    mode = "markers",
    marker = (
        color = L,
        colorscale = "Viridis", # "Default", "Portland", "Jet", etc.
        size = 3,
        opacity = 0.5,
        line = (width = 0,)
    )
  )]

    Plot(data)
end
LOG add! sp=1 ep=33 n=0 BeamSearch(bsize=4, Δ=1.0, maxvisits=1000000) mem=1GB max-rss=2GB 2026-04-02T20:42:55.293
LOG n.size quantiles:[0.0, 2.0, 2.0, 3.0, 3.0]
  0.005777 seconds (3.00 k allocations: 62.656 KiB)

Chebyshev distance (\(L_\infty\))

let
    Ch = predict(fit(Isomap, X, nntype=ApproxChebyshev))
    data = [Config(
    x = Ch[1, :],
    y = Ch[2, :],
    type = "scatter",
    mode = "markers",
    marker = (
        color = L,
        colorscale = "Viridis", # "Default", "Portland", "Jet", etc.
        size = 3,
        opacity = 0.5,
        line = (width = 0,)
    )
  )]

end
LOG add! sp=1 ep=33 n=0 BeamSearch(bsize=4, Δ=1.0, maxvisits=1000000) mem=1GB max-rss=2GB 2026-04-02T20:42:57.138
LOG n.size quantiles:[0.0, 2.0, 2.0, 3.0, 4.0]
  0.005261 seconds (3.00 k allocations: 62.656 KiB)
1-element Vector{Config}:
 Config(:x => [2.0061765603865735, 2.0750681957332207, -0.15903429338648212, -2.2067957775530846, 3.4633238253721736, -2.3555769223120944, -1.595226636913499, -3.0738023824468974, 1.6634046934698812, 0.24062112742915723  …  -0.46713517067199756, 0.2185946555371381, -2.435917651540759, -4.046765348465544, 0.6471996850188392, 3.5541871662188984, -3.8985843310011306, -0.6222333755363453, -1.7572166319664215, 2.0866038991190963], :y => [-0.237361916335264, -0.43435057869143306, 0.3095993094086523, -0.47647707306732756, -0.1889714208943654, -0.554703886892038, -0.15531131732443043, 0.1038979177141637, -0.3348005372518308, 0.43107187066553215  …  0.6922333356398562, -0.547958751643389, 0.5116955417619424, -0.1454906576979319, 0.646702400778618, -0.02541941567051652, -0.21488365410350166, -0.6526451355300763, 0.204122542496101, -0.19578769706421056], :type => "scatter", :mode => "markers", :marker => (color = [1, 1, 2, 3, 0, 3, 3, 3, 1, 2  …  2, 2, 3, 4, 2, 0, 4, 2, 3, 1], colorscale = "Viridis", size = 3, opacity = 0.5, line = (width = 0,)))

Visualizing prime gaps

The difference between contiguous prime numbers is called a Prime gap. We use this series of values as a time series example due to its interesting behavior and since it can be computed without downloading more than the necessary packages.

This example shows how to generate the dataset and index it. We will use the ManifoldLearning for generating the 2d visualization.

Generation of the dataset

The time series is represented with windows of size w, we also take log of gaps to reduce variance in gap values. We create a matrix to avoid redefinition of the knn interface for ManifoldLearning.

function create_database_primes_diff(n, w)
    T = log2.(diff(primes(n)))
    M = Matrix{Float32}(undef, w, length(T) - w)
    @info size(M)
    for i in 1:size(M, 2)
        M[:, i] .= view(T, i:(i+w-1))
    end

    M
end

x, y = let
    P = create_database_primes_diff(3 * 10^4, 5)
    # or LLE
    primesgap = fit(Isomap, P; k=16, maxoutdim=2, nntype=ApproxEuclidean)
    
    p = predict(primesgap)
    p[1, :], p[2, :]
end

A 2D histogram

data = [Config(;
    x = x,
    y = y,
    type = "histogram2d",
    colorscale = "Viridis",   
    nbinsx = 50,
    nbinsy = 50, 
    colorbar = (title = "freq",)
)]

layout = Config(
    title = "Isomap proj. 2D of prime gaps",
    width = 600,
    height = 600,
    xaxis = (title = "X",),
    yaxis = (title = "Y",),
)

Plot(data, layout)

Environment and dependencies

Julia Version 1.10.11
Commit a2b11907d7b (2026-03-09 14:59 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: macOS (x86_64-apple-darwin24.0.0)
  CPU: 8 × Intel(R) Core(TM) i5-8257U CPU @ 1.40GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-15.0.7 (ORCJIT, skylake)
Threads: 8 default, 0 interactive, 4 GC (on 8 virtual cores)
Environment:
  JULIA_NUM_THREADS = auto
  JULIA_PROJECT = @.
  JULIA_LOAD_PATH = @:@stdlib
Status `~/Research/SimilaritySearchDemos/Project.toml`
  [aaaa29a8] Clustering v0.15.8
  [944b1d66] CodecZlib v0.7.8
  [a93c6f00] DataFrames v1.8.1
  [c5bfea45] Embeddings v0.4.6
  [f67ccb44] HDF5 v0.17.2
  [b20bd276] InvertedFiles v0.9.2
 [682c06a0] JSON v0.21.4
  [23fbe1c1] Latexify v0.16.10
  [eb30cadb] MLDatasets v0.7.21
  [06eb3307] ManifoldLearning v0.9.0
 [ca7969ec] PlotlyLight v0.11.0
  [91a5bcdd] Plots v1.41.6
  [27ebfcd6] Primes v0.5.7
  [ca7ab67e] SimSearchManifoldLearning v0.4.0
  [053f045d] SimilaritySearch v0.14.3
 [2913bbd2] StatsBase v0.33.21
  [f3b207a7] StatsPlots v0.15.8
  [7f6f6c8a] TextSearch v0.20.0
Info Packages marked with  and  have new versions available. Those with  may be upgradable, but those with  are restricted by compatibility constraints from upgrading. To see why use `status --outdated`