int64_t modified_distance (unsigned order) const
{ // TODO(garretrieger): once priority is high enough, should try // setting distance = 0 which will force to sort immediately after // it's parent where possible.
/* * A topological sorting of an object graph. Ordered * in reverse serialization order (first object in the * serialization is at the end of the list). This matches * the 'packed' object stack used internally in the * serializer
*/ template<typename T>
graph_t (const T& objects)
: parents_invalid (true),
distance_invalid (true),
positions_invalid (true),
successful (true),
buffers ()
{
num_roots_for_space_.push (1); bool removed_nil = false;
vertices_.alloc (objects.length);
vertices_scratch_.alloc (objects.length); unsigned count = objects.length; for (unsigned i = 0; i < count; i++)
{ // If this graph came from a serialization buffer object 0 is the // nil object. We don't need it for our purposes here so drop it. if (i == 0 && !objects.arrayZ[i])
{
removed_nil = true; continue;
}
vertex_t* v = vertices_.push (); if (check_success (!vertices_.in_error ()))
v->obj = *objects.arrayZ[i];
unsigned root_idx () const
{ // Object graphs are in reverse order, the first object is at the end // of the vector. Since the graph is topologically sorted it's safe to // assume the first object has no incoming edges. return vertices_.length - 1;
}
/* * Adds a 16 bit link from parent_id to child_id
*/ template<typename T> void add_link (T* offset, unsigned parent_id, unsigned child_id)
{ auto& v = vertices_[parent_id]; auto* link = v.obj.real_links.push ();
link->width = 2;
link->objidx = child_id;
link->position = (char*) offset - (char*) v.obj.head;
vertices_[child_id].add_parent (parent_id);
}
/* * Generates a new topological sorting of graph ordered by the shortest * distance to each node if positions are marked as invalid.
*/ void sort_shortest_distance_if_needed ()
{ if (!positions_invalid) return;
sort_shortest_distance ();
}
/* * Generates a new topological sorting of graph ordered by the shortest * distance to each node.
*/ void sort_shortest_distance ()
{
positions_invalid = true;
if (vertices_.length <= 1) { // Graph of 1 or less doesn't need sorting. return;
}
hb_vector_t<unsigned> removed_edges; if (unlikely (!check_success (removed_edges.resize (vertices_.length)))) return;
update_parents ();
queue.insert (root ().modified_distance (0), root_idx ()); int new_id = root_idx (); unsigned order = 1; while (!queue.in_error () && !queue.is_empty ())
{ unsigned next_id = queue.pop_minimum().second;
sorted_graph[new_id] = std::move (vertices_[next_id]); const vertex_t& next = sorted_graph[new_id];
if (unlikely (!check_success(new_id >= 0))) { // We are out of ids. Which means we've visited a node more than once. // This graph contains a cycle which is not allowed.
DEBUG_MSG (SUBSET_REPACK, nullptr, "Invalid graph. Contains cycle."); return;
}
id_map[next_id] = new_id--;
for (constauto& link : next.obj.all_links ()) {
removed_edges[link.objidx]++; if (!(vertices_[link.objidx].incoming_edges () - removed_edges[link.objidx])) // Add the order that the links were encountered to the priority. // This ensures that ties between priorities objects are broken in a consistent // way. More specifically this is set up so that if a set of objects have the same // distance they'll be added to the topological order in the order that they are // referenced from the parent object.
queue.insert (vertices_[link.objidx].modified_distance (order++),
link.objidx);
}
}
if (!check_success (new_id == -1))
print_orphaned_nodes ();
}
/* * Finds the set of nodes (placed into roots) that should be assigned unique spaces. * More specifically this looks for the top most 24 bit or 32 bit links in the graph. * Some special casing is done that is specific to the layout of GSUB/GPOS tables.
*/ void find_space_roots (hb_set_t& visited, hb_set_t& roots)
{ int root_index = (int) root_idx (); for (int i = root_index; i >= 0; i--)
{ if (visited.has (i)) continue;
// Only real links can form 32 bit spaces for (auto& l : vertices_[i].obj.real_links)
{ if (l.is_signed || l.width < 3) continue;
if (i == root_index && l.width == 3) // Ignore 24bit links from the root node, this skips past the single 24bit // pointer to the lookup list. continue;
if (l.width == 3)
{ // A 24bit offset forms a root, unless there is 32bit offsets somewhere // in it's subgraph, then those become the roots instead. This is to make sure // that extension subtables beneath a 24bit lookup become the spaces instead // of the offset to the lookup.
hb_set_t sub_roots;
find_32bit_roots (l.objidx, sub_roots); if (sub_roots) { for (unsigned sub_root_idx : sub_roots) {
roots.add (sub_root_idx);
find_subgraph (sub_root_idx, visited);
} continue;
}
}
if (!r.table->sanitize (*(r.vertex), std::forward<Ts>(ds)...)) return vertex_and_table_t<T> ();
return r;
}
// Finds the object id of the object pointed to by the offset at 'offset' // within object[node_idx]. unsigned index_for_offset (unsigned node_idx, constvoid* offset) const
{ constauto& node = object (node_idx); if (offset < node.head || offset >= node.tail) return -1;
unsigned count = node.real_links.length; for (unsigned i = 0; i < count; i++)
{ // Use direct access for increased performance, this is a hot method. constauto& link = node.real_links.arrayZ[i]; if (offset != node.head + link.position) continue; return link.objidx;
}
return -1;
}
// Finds the object id of the object pointed to by the offset at 'offset' // within object[node_idx]. Ensures that the returned object is safe to mutate. // That is, if the original child object is shared by parents other than node_idx // it will be duplicated and the duplicate will be returned instead. unsigned mutable_index_for_offset (unsigned node_idx, constvoid* offset)
{ unsigned child_idx = index_for_offset (node_idx, offset); auto& child = vertices_[child_idx]; for (unsigned p : child.parents_iter ())
{ if (p != node_idx) { return duplicate (node_idx, child_idx);
}
}
return child_idx;
}
/* * Assign unique space numbers to each connected subgraph of 24 bit and/or 32 bit offset(s). * Currently, this is implemented specifically tailored to the structure of a GPOS/GSUB * (including with 24bit offsets) table.
*/ bool assign_spaces ()
{
update_parents ();
// Mark everything not in the subgraphs of the roots as visited. This prevents // subgraphs from being connected via nodes not in those subgraphs.
visited.invert ();
if (!roots) returnfalse;
while (roots)
{
uint32_t next = HB_SET_VALUE_INVALID; if (unlikely (!check_success (!roots.in_error ()))) break; if (!roots.next (&next)) break;
// TODO(grieger): special case for GSUB/GPOS use extension promotions to move 16 bit space // into the 32 bit space as needed, instead of using isolation.
}
returntrue;
}
/* * Isolates the subgraph of nodes reachable from root. Any links to nodes in the subgraph * that originate from outside of the subgraph will be removed by duplicating the linked to * object. * * Indices stored in roots will be updated if any of the roots are duplicated to new indices.
*/ bool isolate_subgraph (hb_set_t& roots)
{
update_parents ();
hb_map_t subgraph;
// incoming edges to root_idx should be all 32 bit in length so we don't need to de-dup these // set the subgraph incoming edge count to match all of root_idx's incoming edges
hb_set_t parents; for (unsigned root_idx : roots)
{
subgraph.set (root_idx, wide_parents (root_idx, parents));
find_subgraph (root_idx, subgraph);
} if (subgraph.in_error ()) returnfalse;
if (subgraph_incoming_edges < node.incoming_edges ())
{ // Only de-dup objects with incoming links from outside the subgraph.
made_changes = true;
duplicate_subgraph (entry.first, index_map);
}
}
if (in_error ()) returnfalse;
if (!made_changes) returnfalse;
if (original_root_idx != root_idx ()
&& parents.has (original_root_idx))
{ // If the root idx has changed since parents was determined, update root idx in parents
parents.add (root_idx ());
parents.del (original_root_idx);
}
// Update roots set with new indices as needed. for (auto next : roots)
{ const uint32_t *v; if (index_map.has (next, &v))
{
roots.del (next);
roots.add (*v);
}
}
constauto& o = vertices_[node_idx].obj;
size_t size = o.tail - o.head; if (max_depth == 0) return size;
for (constauto& link : o.all_links ())
size += find_subgraph_size (link.objidx, subgraph, max_depth - 1); return size;
}
/* * Finds the topmost children of 32bit offsets in the subgraph starting * at node_idx. Found indices are placed into 'found'.
*/ void find_32bit_roots (unsigned node_idx, hb_set_t& found)
{ for (constauto& link : vertices_[node_idx].obj.all_links ())
{ if (!link.is_signed && link.width == 4) {
found.add (link.objidx); continue;
}
find_32bit_roots (link.objidx, found);
}
}
/* * Moves the child of old_parent_idx pointed to by old_offset to a new * vertex at the new_offset.
*/ template<typename O> void move_child (unsigned old_parent_idx, const O* old_offset, unsigned new_parent_idx, const O* new_offset)
{
distance_invalid = true;
positions_invalid = true;
/* * duplicates all nodes in the subgraph reachable from node_idx. Does not re-assign * links. index_map is updated with mappings from old id to new id. If a duplication has already * been performed for a given index, then it will be skipped.
*/ void duplicate_subgraph (unsigned node_idx, hb_map_t& index_map)
{ if (index_map.has (node_idx)) return;
// The last object is the root of the graph, so swap back the root to the end. // The root's obj idx does change, however since it's root nothing else refers to it. // all other obj idx's will be unaffected.
hb_swap (vertices_[vertices_.length - 2], *clone);
// Since the root moved, update the parents arrays of all children on the root. for (constauto& l : root ().obj.all_links ())
vertices_[l.objidx].remap_parent (root_idx () - 1, root_idx ());
return clone_idx;
}
/* * Creates a copy of child and re-assigns the link from * parent to the clone. The copy is a shallow copy, objects * linked from child are not duplicated. * * Returns the index of the newly created duplicate. * * If the child_idx only has incoming edges from parent_idx, this * will do nothing and return the original child_idx.
*/ unsigned duplicate_if_shared (unsigned parent_idx, unsigned child_idx)
{ unsigned new_idx = duplicate (parent_idx, child_idx); if (new_idx == (unsigned) -1) return child_idx; return new_idx;
}
/* * Creates a copy of child and re-assigns the link from * parent to the clone. The copy is a shallow copy, objects * linked from child are not duplicated. * * Returns the index of the newly created duplicate. * * If the child_idx only has incoming edges from parent_idx, * duplication isn't possible and this will return -1.
*/ unsigned duplicate (unsigned parent_idx, unsigned child_idx)
{
update_parents ();
if (child.incoming_edges () <= links_to_child)
{ // Can't duplicate this node, doing so would orphan the original one as all remaining links // to child are from parent.
DEBUG_MSG (SUBSET_REPACK, nullptr, " Not duplicating %u => %u",
parent_idx, child_idx); return -1;
}
unsigned clone_idx = duplicate (child_idx); if (clone_idx == (unsigned) -1) return -1; // duplicate shifts the root node idx, so if parent_idx was root update it. if (parent_idx == clone_idx) parent_idx++;
auto& parent = vertices_[parent_idx]; for (auto& l : parent.obj.all_links_writer ())
{ if (l.objidx != child_idx) continue;
reassign_link (l, parent_idx, clone_idx);
}
return clone_idx;
}
/* * Creates a copy of child and re-assigns the links from * parents to the clone. The copy is a shallow copy, objects * linked from child are not duplicated. * * Returns the index of the newly created duplicate. * * If the child_idx only has incoming edges from parents, * duplication isn't possible or duplication fails and this will * return -1.
*/ unsigned duplicate (const hb_set_t* parents, unsigned child_idx)
{ if (parents->is_empty()) { return -1;
}
if (child.incoming_edges () <= links_to_child)
{ // Can't duplicate this node, doing so would orphan the original one as all remaining links // to child are from parent.
DEBUG_MSG (SUBSET_REPACK, nullptr, " Not duplicating %u, ..., %u => %u", first_parent, last_parent, child_idx); return -1;
}
for (unsigned parent_idx : *parents) { // duplicate shifts the root node idx, so if parent_idx was root update it. if (parent_idx == clone_idx) parent_idx++; auto& parent = vertices_[parent_idx]; for (auto& l : parent.obj.all_links_writer ())
{ if (l.objidx != child_idx) continue;
reassign_link (l, parent_idx, clone_idx);
}
}
return clone_idx;
}
/* * Adds a new node to the graph, not connected to anything.
*/ unsigned new_node (char* head, char* tail)
{
positions_invalid = true;
distance_invalid = true;
// The last object is the root of the graph, so swap back the root to the end. // The root's obj idx does change, however since it's root nothing else refers to it. // all other obj idx's will be unaffected.
hb_swap (vertices_[vertices_.length - 2], *clone);
// Since the root moved, update the parents arrays of all children on the root. for (constauto& l : root ().obj.all_links ())
vertices_[l.objidx].remap_parent (root_idx () - 1, root_idx ());
return clone_idx;
}
/* * Raises the sorting priority of all children.
*/ bool raise_childrens_priority (unsigned parent_idx)
{
DEBUG_MSG (SUBSET_REPACK, nullptr, " Raising priority of all children of %u",
parent_idx); // This operation doesn't change ordering until a sort is run, so no need // to invalidate positions. It does not change graph structure so no need // to update distances or edge counts. auto& parent = vertices_[parent_idx].obj; bool made_change = false; for (auto& l : parent.all_links_writer ())
made_change |= vertices_[l.objidx].raise_priority (); return made_change;
}
bool is_fully_connected ()
{
update_parents();
if (root().incoming_edges ()) // Root cannot have parents. returnfalse;
for (unsigned i = 0; i < root_idx (); i++)
{ if (!vertices_[i].incoming_edges ()) returnfalse;
} returntrue;
}
#if 0 /* * Saves the current graph to a packed binary format which the repacker fuzzer takes * as a seed.
*/ void save_fuzzer_seed (hb_tag_t tag) const
{
FILE* f = fopen ("./repacker_fuzzer_seed", "w");
fwrite ((void*) &tag, sizeof (tag), 1, f);
for (unsigned i = 0; i < vertices_.length; i++)
{ for (constauto& l : vertices_[i].obj.real_links)
{
link_t link {
(uint16_t) i, (uint16_t) l.objidx,
(uint16_t) l.position, (uint8_t) l.width
};
fwrite ((void*) &link, sizeof (link), 1, f);
}
}
fclose (f);
} #endif
void print_orphaned_nodes ()
{ if (!DEBUG_ENABLED(SUBSET_REPACK)) return;
DEBUG_MSG (SUBSET_REPACK, nullptr, "Graph is not fully connected.");
parents_invalid = true;
update_parents();
if (root().incoming_edges ()) {
DEBUG_MSG (SUBSET_REPACK, nullptr, "Root node has incoming edges.");
}
for (unsigned i = 0; i < root_idx (); i++)
{ constauto& v = vertices_[i]; if (!v.incoming_edges ())
DEBUG_MSG (SUBSET_REPACK, nullptr, "Node %u is orphaned.", i);
}
}
public: /* * Creates a map from objid to # of incoming edges.
*/ void update_parents ()
{ if (!parents_invalid) return;
unsigned count = vertices_.length;
for (unsigned i = 0; i < count; i++)
vertices_.arrayZ[i].reset_parents ();
for (unsigned p = 0; p < count; p++)
{ for (auto& l : vertices_.arrayZ[p].obj.all_links ())
vertices_[l.objidx].add_parent (p);
}
for (unsigned i = 0; i < count; i++) // parents arrays must be accurate or downstream operations like cycle detection // and sorting won't work correctly.
check_success (!vertices_.arrayZ[i].in_error ());
parents_invalid = false;
}
/* * compute the serialized start and end positions for each vertex.
*/ void update_positions ()
{ if (!positions_invalid) return;
unsigned current_pos = 0; for (int i = root_idx (); i >= 0; i--)
{ auto& v = vertices_[i];
v.start = current_pos;
current_pos += v.obj.tail - v.obj.head;
v.end = current_pos;
}
positions_invalid = false;
}
/* * Finds the distance to each object in the graph * from the initial node.
*/ void update_distances ()
{ if (!distance_invalid) return;
// Uses Dijkstra's algorithm to find all of the shortest distances. // https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm // // Implementation Note: // Since our priority queue doesn't support fast priority decreases // we instead just add new entries into the queue when a priority changes. // Redundant ones are filtered out later on by the visited set. // According to https://www3.cs.stonybrook.edu/~rezaul/papers/TR-07-54.pdf // for practical performance this is faster then using a more advanced queue // (such as a fibonacci queue) with a fast decrease priority. unsigned count = vertices_.length; for (unsigned i = 0; i < count; i++)
vertices_.arrayZ[i].distance = hb_int_max (int64_t);
vertices_.tail ().distance = 0;
private: /* * Updates a link in the graph to point to a different object. Corrects the * parents vector on the previous and new child nodes.
*/ void reassign_link (hb_serialize_context_t::object_t::link_t& link, unsigned parent_idx, unsigned new_idx)
{ unsigned old_idx = link.objidx;
link.objidx = new_idx;
vertices_[old_idx].remove_parent (parent_idx);
vertices_[new_idx].add_parent (parent_idx);
}
/* * Updates all objidx's in all links using the provided mapping. Corrects incoming edge counts.
*/ template<typename Iterator, hb_requires (hb_is_iterator (Iterator))> void remap_obj_indices (const hb_map_t& id_map,
Iterator subgraph, bool only_wide = false)
{ if (!id_map) return; for (unsigned i : subgraph)
{ for (auto& link : vertices_[i].obj.all_links_writer ())
{ const uint32_t *v; if (!id_map.has (link.objidx, &v)) continue; if (only_wide && !(link.width == 4 && !link.is_signed)) continue;
reassign_link (link, i, *v);
}
}
}
/* * Updates all objidx's in all links using the provided mapping.
*/ bool remap_all_obj_indices (const hb_vector_t<unsigned>& id_map,
hb_vector_t<vertex_t>* sorted_graph) const
{ unsigned count = sorted_graph->length; for (unsigned i = 0; i < count; i++)
{ if (!(*sorted_graph)[i].remap_parents (id_map)) returnfalse; for (auto& link : sorted_graph->arrayZ[i].obj.all_links_writer ())
{
link.objidx = id_map[link.objidx];
}
} returntrue;
}
/* * Finds all nodes in targets that are reachable from start_idx, nodes in visited will be skipped. * For this search the graph is treated as being undirected. * * Connected targets will be added to connected and removed from targets. All visited nodes * will be added to visited.
*/ void find_connected_nodes (unsigned start_idx,
hb_set_t& targets,
hb_set_t& visited,
hb_set_t& connected)
{ if (unlikely (!check_success (!visited.in_error ()))) return; if (visited.has (start_idx)) return;
visited.add (start_idx);
if (targets.has (start_idx))
{
targets.del (start_idx);
connected.add (start_idx);
}
constauto& v = vertices_[start_idx];
// Graph is treated as undirected so search children and parents of start_idx for (constauto& l : v.obj.all_links ())
find_connected_nodes (l.objidx, targets, visited, connected);
for (unsigned p : v.parents_iter ())
find_connected_nodes (p, targets, visited, connected);
}
public: // TODO(garretrieger): make private, will need to move most of offset overflow code into graph.
hb_vector_t<vertex_t> vertices_;
hb_vector_t<vertex_t> vertices_scratch_; private: bool parents_invalid; bool distance_invalid; bool positions_invalid; bool successful;
hb_vector_t<unsigned> num_roots_for_space_;
hb_vector_t<char*> buffers;
};
}
#endif// GRAPH_GRAPH_HH
Messung V0.5
¤ Dauer der Verarbeitung: 0.23 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.