diff --git a/lib/rdf/canonicalization/canonicalization.ex b/lib/rdf/canonicalization/canonicalization.ex index 88b64d5c..8d61a20b 100644 --- a/lib/rdf/canonicalization/canonicalization.ex +++ b/lib/rdf/canonicalization/canonicalization.ex @@ -8,11 +8,17 @@ defmodule RDF.Canonicalization do end defp urdna2015(input) do - input - |> State.new() - |> create_canonical_identifiers_for_single_node_hashes() - |> create_canonical_identifiers_for_multiple_node_hashes() - |> apply_canonicalization(input) + {:ok, issuer_sv} = IdentifierIssuer.Supervisor.start_link() + + try do + input + |> State.new() + |> create_canonical_identifiers_for_single_node_hashes() + |> create_canonical_identifiers_for_multiple_node_hashes(issuer_sv) + |> apply_canonicalization(input) + after + DynamicSupervisor.stop(issuer_sv) + end end # 3) @@ -77,23 +83,21 @@ defmodule RDF.Canonicalization do defp do_create_canonical_identifiers_for_single_node_hashes(state, _, false), do: state # 6) - defp create_canonical_identifiers_for_multiple_node_hashes(state) do + defp create_canonical_identifiers_for_multiple_node_hashes(state, issuer_sv) do state.hash_to_bnodes |> Enum.sort() |> Enum.reduce(state, fn {_hash, identifier_list}, state -> # 6.1-2) Create a hash_path_list for all bnodes using a temporary identifier used to create canonical replacements identifier_list |> Enum.reduce([], fn identifier, hash_path_list -> - if IdentifierIssuer.issued?(state.canonical_issuer, identifier) do + if IdentifierIssuer.State.issued?(state.canonical_issuer, identifier) do hash_path_list else - {_issued_identifier, temporary_issuer} = - "_:b" - |> IdentifierIssuer.new() - |> IdentifierIssuer.issue_identifier(identifier) + temporary_issuer = IdentifierIssuer.Supervisor.new_issuer(issuer_sv, "_:b") + IdentifierIssuer.issue_identifier(temporary_issuer, identifier) [ - hash_n_degree_quads(state, identifier, temporary_issuer) + hash_n_degree_quads(state, identifier, temporary_issuer, issuer_sv) | hash_path_list ] end @@ -117,7 +121,7 @@ defmodule RDF.Canonicalization do Statement.map(statement, fn {_, %BlankNode{} = bnode} -> state.canonical_issuer - |> IdentifierIssuer.identifier(bnode) + |> IdentifierIssuer.State.identifier(bnode) |> BlankNode.new() {_, node} -> @@ -155,7 +159,7 @@ defmodule RDF.Canonicalization do # see https://www.w3.org/community/reports/credentials/CG-FINAL-rdf-dataset-canonicalization-20221009/#hash-related-blank-node defp hash_related_bnode(state, related, statement, issuer, position) do identifier = - IdentifierIssuer.identifier(state.canonical_issuer, related) || + IdentifierIssuer.State.identifier(state.canonical_issuer, related) || IdentifierIssuer.identifier(issuer, related) || hash_first_degree_quads(state, related) @@ -173,7 +177,7 @@ defmodule RDF.Canonicalization do end # see https://www.w3.org/community/reports/credentials/CG-FINAL-rdf-dataset-canonicalization-20221009/#hash-n-degree-quads - def hash_n_degree_quads(state, identifier, issuer) do + def hash_n_degree_quads(state, identifier, issuer, issuer_sv) do # IO.inspect(identifier, label: "ndeg: identifier") # 1-3) @@ -205,27 +209,25 @@ defmodule RDF.Canonicalization do |> Enum.reduce({chosen_path, chosen_issuer}, fn permutation, {chosen_path, chosen_issuer} -> # IO.inspect(permutation, label: "ndeg: perm") - issuer_copy = issuer + + issuer_copy = IdentifierIssuer.Supervisor.copy_issuer(issuer_sv, issuer) chosen_path_length = String.length(chosen_path) + # 5.4.4) - {path, recursion_list, issuer_copy} = - Enum.reduce_while(permutation, {"", [], issuer_copy}, fn - related, {path, recursion_list, issuer_copy} -> - {path, recursion_list, issuer_copy} = + {path, recursion_list} = + Enum.reduce_while(permutation, {"", []}, fn + related, {path, recursion_list} -> + {path, recursion_list} = if issued_identifier = - IdentifierIssuer.identifier(state.canonical_issuer, related) do - {path <> issued_identifier, recursion_list, issuer_copy} + IdentifierIssuer.State.identifier(state.canonical_issuer, related) do + {path <> issued_identifier, recursion_list} else if issued_identifier = IdentifierIssuer.identifier(issuer_copy, related) do - {path <> issued_identifier, recursion_list, issuer_copy} + {path <> issued_identifier, recursion_list} else - {issued_identifier, issuer_copy} = - IdentifierIssuer.issue_identifier(issuer_copy, related) - { - path <> issued_identifier, - [related | recursion_list], - issuer_copy + path <> IdentifierIssuer.issue_identifier(issuer_copy, related), + [related | recursion_list] } end end @@ -233,9 +235,9 @@ defmodule RDF.Canonicalization do if chosen_path_length != 0 and String.length(path) >= chosen_path_length and path > chosen_path do - {:halt, {path, recursion_list, issuer_copy}} + {:halt, {path, recursion_list}} else - {:cont, {path, recursion_list, issuer_copy}} + {:cont, {path, recursion_list}} end end) @@ -246,16 +248,14 @@ defmodule RDF.Canonicalization do recursion_list |> Enum.reverse() |> Enum.reduce_while({issuer_copy, path}, fn related, {issuer_copy, path} -> + # Note: The following steps are the only steps in the whole algorithm which really seem to rely on global state. {result_hash, result_issuer} = - hash_n_degree_quads(state, related, issuer_copy) - - # TODO: This step doesn't work without global state: - # issuing an identifier in the issuer copy which MIGHT be the result_issuer ... - # This causes some tests to fail, eg. test023 - {issued_identifier, issuer_copy} = - IdentifierIssuer.issue_identifier(issuer_copy, related) + hash_n_degree_quads(state, related, issuer_copy, issuer_sv) - path = path <> issued_identifier <> "<#{result_hash}>" + path = + path <> + IdentifierIssuer.issue_identifier(issuer_copy, related) <> + "<#{result_hash}>" if chosen_path_length != 0 and String.length(path) >= chosen_path_length and diff --git a/lib/rdf/canonicalization/identifier_issuer.ex b/lib/rdf/canonicalization/identifier_issuer.ex index 2b03198a..94a3552a 100644 --- a/lib/rdf/canonicalization/identifier_issuer.ex +++ b/lib/rdf/canonicalization/identifier_issuer.ex @@ -3,48 +3,46 @@ defmodule RDF.Canonicalization.IdentifierIssuer do An identifier issuer is used to issue new blank node identifier. """ - defstruct issued_identifiers: %{}, - issue_order: [], - identifier_counter: 0, - identifier_prefix: nil + use GenServer - def new(prefix) do - %__MODULE__{identifier_prefix: prefix} - end - - def canonical, do: new("_:c14n") + alias RDF.Canonicalization.IdentifierIssuer.State - @doc """ - Issues a new blank node identifier for a given existing blank node identifier. + # API - See - """ - def issue_identifier(issuer, existing_identifier) do - if issued_identifier = issuer.issued_identifiers[existing_identifier] do - {issued_identifier, issuer} - else - issued_identifier = issuer.identifier_prefix <> Integer.to_string(issuer.identifier_counter) - - {issued_identifier, - %{ - issuer - | issued_identifiers: - Map.put(issuer.issued_identifiers, existing_identifier, issued_identifier), - issue_order: [existing_identifier | issuer.issue_order], - identifier_counter: issuer.identifier_counter + 1 - }} - end + def start_link(state_or_prefix, opts \\ []) do + GenServer.start_link(__MODULE__, state_or_prefix, opts) end - def identifier(issuer, identifier) do - Map.get(issuer.issued_identifiers, identifier) + def stop(pid, reason \\ :normal, timeout \\ :infinity) do + GenServer.stop(pid, reason, timeout) end - def issued?(issuer, identifier) do - Map.has_key?(issuer.issued_identifiers, identifier) - end + def state(pid), do: GenServer.call(pid, :state) + def issue_identifier(pid, identifier), do: GenServer.call(pid, {:issue_identifier, identifier}) + def identifier(pid, identifier), do: GenServer.call(pid, {:identifier, identifier}) + def issued?(pid, identifier), do: GenServer.call(pid, {:issued?, identifier}) + def issued_identifiers(pid), do: GenServer.call(pid, :issued_identifiers) + + # Callbacks - def issued_identifiers(state) do - Enum.reverse(state.issue_order) + @impl true + def init(%State{} = state), do: {:ok, state} + def init(prefix), do: {:ok, State.new(prefix)} + + @impl true + def handle_call(:state, _, state), do: {:reply, state, state} + + def handle_call({:issue_identifier, identifier}, _, state) do + {issued_identifier, state} = State.issue_identifier(state, identifier) + {:reply, issued_identifier, state} end + + def handle_call({:identifier, identifier}, _, state), + do: {:reply, State.identifier(state, identifier), state} + + def handle_call({:issued?, identifier}, _, state), + do: {:reply, State.issued?(state, identifier), state} + + def handle_call(:issued_identifiers, _, state), + do: {:reply, State.issued_identifiers(state), state} end diff --git a/lib/rdf/canonicalization/identifier_issuer/state.ex b/lib/rdf/canonicalization/identifier_issuer/state.ex new file mode 100644 index 00000000..f8b76c3e --- /dev/null +++ b/lib/rdf/canonicalization/identifier_issuer/state.ex @@ -0,0 +1,52 @@ +defmodule RDF.Canonicalization.IdentifierIssuer.State do + @moduledoc """ + State of a `RDF.Canonicalization.IdentifierIssuer`. + + + """ + + defstruct issued_identifiers: %{}, + issue_order: [], + identifier_counter: 0, + identifier_prefix: nil + + def new(prefix) do + %__MODULE__{identifier_prefix: prefix} + end + + def canonical, do: new("_:c14n") + + @doc """ + Issues a new blank node identifier for a given existing blank node identifier. + + See + """ + def issue_identifier(state, existing_identifier) do + if issued_identifier = state.issued_identifiers[existing_identifier] do + {issued_identifier, state} + else + issued_identifier = state.identifier_prefix <> Integer.to_string(state.identifier_counter) + + {issued_identifier, + %{ + state + | issued_identifiers: + Map.put(state.issued_identifiers, existing_identifier, issued_identifier), + issue_order: [existing_identifier | state.issue_order], + identifier_counter: state.identifier_counter + 1 + }} + end + end + + def identifier(state, identifier) do + Map.get(state.issued_identifiers, identifier) + end + + def issued?(state, identifier) do + Map.has_key?(state.issued_identifiers, identifier) + end + + def issued_identifiers(state) do + Enum.reverse(state.issue_order) + end +end diff --git a/lib/rdf/canonicalization/identifier_issuer/supervisor.ex b/lib/rdf/canonicalization/identifier_issuer/supervisor.ex new file mode 100644 index 00000000..93716b38 --- /dev/null +++ b/lib/rdf/canonicalization/identifier_issuer/supervisor.ex @@ -0,0 +1,25 @@ +defmodule RDF.Canonicalization.IdentifierIssuer.Supervisor do + use DynamicSupervisor + + alias RDF.Canonicalization.IdentifierIssuer + + def start_link(init_arg \\ nil) do + DynamicSupervisor.start_link(__MODULE__, init_arg) + end + + @impl true + def init(_init_arg) do + DynamicSupervisor.init(strategy: :one_for_one, max_restarts: 0) + end + + def new_issuer(supervisor, prefix) do + {:ok, issuer_pid} = DynamicSupervisor.start_child(supervisor, {IdentifierIssuer, prefix}) + issuer_pid + end + + def copy_issuer(supervisor, issuer) do + state = IdentifierIssuer.state(issuer) + {:ok, issuer_pid} = DynamicSupervisor.start_child(supervisor, {IdentifierIssuer, state}) + issuer_pid + end +end diff --git a/lib/rdf/canonicalization/state.ex b/lib/rdf/canonicalization/state.ex index 8a1956fa..517ab0aa 100644 --- a/lib/rdf/canonicalization/state.ex +++ b/lib/rdf/canonicalization/state.ex @@ -10,7 +10,7 @@ defmodule RDF.Canonicalization.State do defstruct bnode_to_statements: nil, hash_to_bnodes: %{}, - canonical_issuer: IdentifierIssuer.canonical() + canonical_issuer: IdentifierIssuer.State.canonical() def new(input) do %__MODULE__{bnode_to_statements: bnode_to_statements(input)} @@ -18,7 +18,7 @@ defmodule RDF.Canonicalization.State do def issue_canonical_identifier(state, identifier) do {_issued_identifier, canonical_issuer} = - IdentifierIssuer.issue_identifier(state.canonical_issuer, identifier) + IdentifierIssuer.State.issue_identifier(state.canonical_issuer, identifier) %{state | canonical_issuer: canonical_issuer} end diff --git a/test/unit/canonicalization/identifier_issuer_test.exs b/test/unit/canonicalization/identifier_issuer_test.exs new file mode 100644 index 00000000..a771a173 --- /dev/null +++ b/test/unit/canonicalization/identifier_issuer_test.exs @@ -0,0 +1,29 @@ +defmodule RDF.Canonicalization.IdentifierIssuerTest do + use RDF.Test.Case + + doctest RDF.Canonicalization.IdentifierIssuer + + alias RDF.Canonicalization.IdentifierIssuer + + test "new_issuer/1" do + {:ok, sv_pid} = start_supervised(IdentifierIssuer.Supervisor) + + assert issuer1 = IdentifierIssuer.Supervisor.new_issuer(sv_pid, "issuer1") + assert is_pid(issuer1) + + assert IdentifierIssuer.state(issuer1) == IdentifierIssuer.State.new("issuer1") + assert IdentifierIssuer.issue_identifier(issuer1, "foo") == "issuer10" + assert IdentifierIssuer.issue_identifier(issuer1, "foo") == "issuer10" + assert IdentifierIssuer.identifier(issuer1, "foo") == "issuer10" + + assert issuer2 = IdentifierIssuer.Supervisor.new_issuer(sv_pid, "issuer2") + assert IdentifierIssuer.issue_identifier(issuer2, "foo") == "issuer20" + assert IdentifierIssuer.issue_identifier(issuer1, "foo") == "issuer10" + + assert issuer3 = IdentifierIssuer.Supervisor.copy_issuer(sv_pid, issuer1) + assert IdentifierIssuer.identifier(issuer3, "foo") == "issuer10" + assert IdentifierIssuer.issue_identifier(issuer3, "bar") == "issuer11" + assert IdentifierIssuer.issued?(issuer3, "bar") + refute IdentifierIssuer.issued?(issuer1, "bar") + end +end