Skip to content

A documented way to syncronize Manifests of notebook collection #3312

@mmikhasenko

Description

@mmikhasenko

Migrated from slack (as important project to do)

Problem

by @FelixWechsler

I have a few Pluto notebooks and I want to synchronize their Manifest to be all the same (reducing loading times).
Is there a automatic script or way to do that?

Solution from Claude
function update_pluto_blocks_with_using(source_file::String, target_file::String)
    source_content = read(source_file, String)
    target_content = read(target_file, String)

    # Define the blocks we need to handle
    blocks = [
        (hash = "00000000-0000-0000-0000-000000000001", name = "PLUTO_PROJECT_TOML_CONTENTS"),
        (hash = "00000000-0000-0000-0000-000000000002", name = "PLUTO_MANIFEST_TOML_CONTENTS")
    ]

    updated_content = target_content

    # First, find and extract all cells with 'using' statements from source
    using_cells = []
    cell_pattern = r"# ╔═╡ ([a-f0-9\-]+)\n([^#]*?)(?=\n# ╔═╡|\z)"s

    for match in eachmatch(cell_pattern, source_content)
        cell_hash = match.captures[1]
        cell_content = match.captures[2]

        # Check if this cell contains a 'using' statement
        if contains(cell_content, "using ")
            push!(using_cells, (hash = cell_hash, content = "# ╔═╡ $cell_hash\n$cell_content"))
            println("Found using statement in cell: $cell_hash")
        end
    end

    # Add using cells to target if they don't exist
    for cell in using_cells
        if !contains(updated_content, cell.hash)
            # Insert before Cell order
            cell_order_idx = findfirst("# ╔═╡ Cell order:", updated_content)
            if !isnothing(cell_order_idx)
                insert_pos = first(cell_order_idx)
                updated_content = updated_content[1:insert_pos-1] * 
                                cell.content * "\n\n" * 
                                updated_content[insert_pos:end]
                println("Inserted using cell: $(cell.hash)")
            end
        else
            # Replace existing cell with the one from source
            start_marker = "# ╔═╡ $(cell.hash)"
            cell_start = findfirst(start_marker, updated_content)
            if !isnothing(cell_start)
                next_cell_pattern = r"\n# ╔═╡"
                search_start = last(cell_start) + 1
                next_cell_match = findnext(next_cell_pattern, updated_content, search_start)
                cell_end = isnothing(next_cell_match) ? length(updated_content) : first(next_cell_match) - 1

                updated_content = updated_content[1:first(cell_start)-1] * 
                                 cell.content * 
                                 updated_content[cell_end+1:end]
                println("Updated existing using cell: $(cell.hash)")
            end
        end
    end

    # Process TOML blocks
    for block in blocks
        # Find the block in source file
        start_marker = "# ╔═╡ $(block.hash)"
        start_idx = findfirst(start_marker, source_content)

        if isnothing(start_idx)
            println("Warning: Could not find $(block.name) block in source file")
            continue
        end

        # Find the end of the block
        next_cell_pattern = r"\n# ╔═╡"
        search_start = last(start_idx) + 1
        next_cell_match = findnext(next_cell_pattern, source_content, search_start)

        end_idx = isnothing(next_cell_match) ? length(source_content) : first(next_cell_match) - 1

        # Extract the block
        source_block = source_content[first(start_idx):end_idx]

        # Check if block exists in target
        if contains(updated_content, start_marker)
            # Replace existing block
            target_start = findfirst(start_marker, updated_content)
            target_search_start = last(target_start) + 1
            target_next_cell = findnext(next_cell_pattern, updated_content, target_search_start)
            target_end = isnothing(target_next_cell) ? length(updated_content) : first(target_next_cell) - 1

            updated_content = updated_content[1:first(target_start)-1] * 
                             source_block * 
                             updated_content[target_end+1:end]
            println("Replaced existing $(block.name) block")
        else
            # Insert before Cell order
            cell_order_idx = findfirst("# ╔═╡ Cell order:", updated_content)
            if !isnothing(cell_order_idx)
                insert_pos = first(cell_order_idx)
                updated_content = updated_content[1:insert_pos-1] * 
                                source_block * "\n\n" * 
                                updated_content[insert_pos:end]
                println("Inserted new $(block.name) block")
            else
                error("Could not find Cell order section")
            end
        end
    end

    # Update cell order to ensure using cells are at the top
    cell_order_idx = findfirst("# ╔═╡ Cell order:", updated_content)
    if !isnothing(cell_order_idx)
        cell_order_start = first(cell_order_idx)
        cell_order_end = length(updated_content)

        # Extract cell order lines
        cell_order_content = updated_content[cell_order_start:cell_order_end]
        lines = split(cell_order_content, '\n')

        # Separate the header and cell references
        header_line = lines[1]  # "# ╔═╡ Cell order:"
        cell_lines = lines[2:end]

        # Collect all unique cell hashes
        using_hashes = [cell.hash for cell in using_cells]
        toml_hashes = [block.hash for block in blocks]

        # Filter out using cells and TOML blocks from existing order
        other_cells = filter(line -> begin
            if isempty(strip(line))
                return false
            end
            # Extract hash from line
            hash_match = match(r"([a-f0-9\-]+)", line)
            if isnothing(hash_match)
                return false
            end
            hash = hash_match.captures[1]
            return !(hash in using_hashes || hash in toml_hashes)
        end, cell_lines)

        # Reconstruct cell order with using cells at top
        new_order = [header_line]

        # Add using cells first
        for cell in using_cells
            push!(new_order, "# ╠═$(cell.hash)")
        end

        # Add other cells
        append!(new_order, other_cells)

        # Add TOML blocks at the end
        for block in blocks
            push!(new_order, "# ╟─$(block.hash)")
        end

        # Reconstruct the content
        updated_content = updated_content[1:cell_order_start-1] * join(new_order, '\n')
    end

    # Write the updated content back to the target file
    write(target_file, updated_content)
    println("Successfully updated $target_file")
end

# Usage
update_pluto_blocks_with_using("a.jl", "b.jl")

Related

  • some scripts

https://github.com/mmikhasenko/ReactionAmplitudeExamples.jl/blob/main/services/update_notebooks.jl
to be specific
https://github.com/mmikhasenko/ReactionAmplitudeExamples.jl/blob/main/services/update_notebooks.jl#L30C3-L59

I would really like a script that takes a collection of notebooks and:

  • Creates a Pkg environment that contains all packages imported in all notebooks
  • For each notebook, use a copy of that mega-env, with unused packages removed.
    I think this should minimize load times and caches can be reused 🙂

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions