# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Extract histogram names from the description XML file.
For more information on the format of the XML file, which is self-documenting,
see histograms.xml; however, here is a simple example to get you started. The
XML below will generate the following five histograms:
HistogramTime
HistogramEnum
HistogramEnum_Chrome
HistogramEnum_IE
HistogramEnum_Firefox
A brief description.
This is a more thorough description of this histogram.
This histogram sports an enum value type.
This is an example enum type, where the values mean little.
This is the first value.
This is the second value.
"""
import copy
import logging
import xml.dom.minidom
MAX_FIELDTRIAL_DEPENDENCY_DEPTH = 5
class Error(Exception):
pass
def _JoinChildNodes(tag):
"""Join child nodes into a single text.
Applicable to leafs like 'summary' and 'detail'.
Args:
tag: parent node
Returns:
a string with concatenated nodes' text representation.
"""
return ''.join(c.toxml() for c in tag.childNodes).strip()
def _NormalizeString(s):
"""Normalizes a string (possibly of multiple lines) by replacing each
whitespace sequence with a single space.
Args:
s: The string to normalize, e.g. ' \n a b c\n d '
Returns:
The normalized string, e.g. 'a b c d'
"""
return ' '.join(s.split())
def _NormalizeAllAttributeValues(node):
"""Recursively normalizes all tag attribute values in the given tree.
Args:
node: The minidom node to be normalized.
Returns:
The normalized minidom node.
"""
if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
for a in node.attributes.keys():
node.attributes[a].value = _NormalizeString(node.attributes[a].value)
for c in node.childNodes: _NormalizeAllAttributeValues(c)
return node
def _ExpandHistogramNameWithFieldTrial(group_name, histogram_name, fieldtrial):
"""Creates a new histogram name based on the field trial group.
Args:
group_name: The name of the field trial group. May be empty.
histogram_name: The name of the histogram. May be of the form
Group.BaseName or BaseName
field_trial: The FieldTrial XML element.
Returns:
A string with the expanded histogram name.
Raises:
Error if the expansion can't be done.
"""
if fieldtrial.hasAttribute('separator'):
separator = fieldtrial.getAttribute('separator')
else:
separator = '_'
if fieldtrial.hasAttribute('ordering'):
ordering = fieldtrial.getAttribute('ordering')
else:
ordering = 'suffix'
if ordering not in ['prefix', 'suffix']:
logging.error('ordering needs to be prefix or suffix, value is %s' %
ordering)
raise Error()
if not group_name:
return histogram_name
if ordering == 'suffix':
return histogram_name + separator + group_name
# For prefixes, the group_name is inserted between the "cluster" and the
# "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist.
sections = histogram_name.split('.')
if len(sections) <= 1:
logging.error(
'Prefix Field Trial expansions require histogram names which include a '
'dot separator. Histogram name is %s, and Field Trial is %s' %
(histogram_name, fieldtrial.getAttribute('name')))
raise Error()
cluster = sections[0] + '.'
remainder = '.'.join(sections[1:])
return cluster + group_name + separator + remainder
def _ExtractEnumsFromXmlTree(tree):
"""Extract all nodes in the tree into a dictionary."""
enums = {}
have_errors = False
last_name = None
for enum in tree.getElementsByTagName("enum"):
if enum.getAttribute('type') != 'int':
logging.error('Unknown enum type %s' % enum.getAttribute('type'))
have_errors = True
continue
name = enum.getAttribute('name')
if last_name is not None and name.lower() < last_name.lower():
logging.error('Enums %s and %s are not in alphabetical order'
% (last_name, name))
have_errors = True
last_name = name
if name in enums:
logging.error('Duplicate enum %s' % name)
have_errors = True
continue
last_int_value = None
enum_dict = {}
enum_dict['name'] = name
enum_dict['values'] = {}
for int_tag in enum.getElementsByTagName("int"):
value_dict = {}
int_value = int(int_tag.getAttribute('value'))
if last_int_value is not None and int_value < last_int_value:
logging.error('Enum %s int values %d and %d are not in numerical order'
% (name, last_int_value, int_value))
have_errors = True
last_int_value = int_value
if int_value in enum_dict['values']:
logging.error('Duplicate enum value %d for enum %s' % (int_value, name))
have_errors = True
continue
value_dict['label'] = int_tag.getAttribute('label')
value_dict['summary'] = _JoinChildNodes(int_tag)
enum_dict['values'][int_value] = value_dict
summary_nodes = enum.getElementsByTagName("summary")
if len(summary_nodes) > 0:
enum_dict['summary'] = _NormalizeString(_JoinChildNodes(summary_nodes[0]))
enums[name] = enum_dict
return enums, have_errors
def _ExtractHistogramsFromXmlTree(tree, enums):
"""Extract all nodes in the tree into a dictionary."""
# Process the histograms. The descriptions can include HTML tags.
histograms = {}
have_errors = False
last_name = None
for histogram in tree.getElementsByTagName("histogram"):
name = histogram.getAttribute('name')
if last_name is not None and name.lower() < last_name.lower():
logging.error('Histograms %s and %s are not in alphabetical order'
% (last_name, name))
have_errors = True
last_name = name
if name in histograms:
logging.error('Duplicate histogram definition %s' % name)
have_errors = True
continue
histograms[name] = histogram_entry = {}
# Find tag.
summary_nodes = histogram.getElementsByTagName("summary")
if len(summary_nodes) > 0:
histogram_entry['summary'] = _NormalizeString(
_JoinChildNodes(summary_nodes[0]))
else:
histogram_entry['summary'] = 'TBD'
# Find tag.
obsolete_nodes = histogram.getElementsByTagName("obsolete")
if len(obsolete_nodes) > 0:
reason = _JoinChildNodes(obsolete_nodes[0])
histogram_entry['obsolete'] = reason
# Handle units.
if histogram.hasAttribute('units'):
histogram_entry['units'] = histogram.getAttribute('units')
# Find tag.
details_nodes = histogram.getElementsByTagName("details")
if len(details_nodes) > 0:
histogram_entry['details'] = _NormalizeString(
_JoinChildNodes(details_nodes[0]))
# Handle enum types.
if histogram.hasAttribute('enum'):
enum_name = histogram.getAttribute('enum')
if not enum_name in enums:
logging.error('Unknown enum %s in histogram %s' % (enum_name, name))
have_errors = True
else:
histogram_entry['enum'] = enums[enum_name]
return histograms, have_errors
def _UpdateHistogramsWithFieldTrialInformation(tree, histograms):
"""Process field trials' tags and combine with affected histograms.
The histograms dictionary will be updated in-place by adding new histograms
created by combining histograms themselves with field trials targetting these
histograms.
Args:
tree: XML dom tree.
histograms: a dictinary of histograms previously extracted from the tree;
Returns:
True if any errors were found.
"""
have_errors = False
# Verify order of fieldtrial fields first.
last_name = None
for fieldtrial in tree.getElementsByTagName("fieldtrial"):
name = fieldtrial.getAttribute('name')
if last_name is not None and name.lower() < last_name.lower():
logging.error('Field trials %s and %s are not in alphabetical order'
% (last_name, name))
have_errors = True
last_name = name
# Field trials can depend on other field trials, so we need to be careful.
# Make a temporary copy of the list of field trials to use as a queue.
# Field trials whose dependencies have not yet been processed will get
# relegated to the back of the queue to be processed later.
reprocess_queue = []
def GenerateFieldTrials():
for f in tree.getElementsByTagName("fieldtrial"): yield 0, f
for r, f in reprocess_queue: yield r, f
for reprocess_count, fieldtrial in GenerateFieldTrials():
# Check dependencies first
dependencies_valid = True
affected_histograms = fieldtrial.getElementsByTagName('affected-histogram')
for affected_histogram in affected_histograms:
histogram_name = affected_histogram.getAttribute('name')
if not histogram_name in histograms:
# Base histogram is missing
dependencies_valid = False
missing_dependency = histogram_name
break
if not dependencies_valid:
if reprocess_count < MAX_FIELDTRIAL_DEPENDENCY_DEPTH:
reprocess_queue.append( (reprocess_count + 1, fieldtrial) )
continue
else:
logging.error('Field trial %s is missing its dependency %s'
% (fieldtrial.getAttribute('name'),
missing_dependency))
have_errors = True
continue
name = fieldtrial.getAttribute('name')
groups = fieldtrial.getElementsByTagName('group')
group_labels = {}
for group in groups:
group_labels[group.getAttribute('name')] = group.getAttribute('label')
last_histogram_name = None
for affected_histogram in affected_histograms:
histogram_name = affected_histogram.getAttribute('name')
if (last_histogram_name is not None
and histogram_name.lower() < last_histogram_name.lower()):
logging.error('Affected histograms %s and %s of field trial %s are not '
'in alphabetical order'
% (last_histogram_name, histogram_name, name))
have_errors = True
last_histogram_name = histogram_name
base_description = histograms[histogram_name]
with_groups = affected_histogram.getElementsByTagName('with-group')
if len(with_groups) > 0:
histogram_groups = with_groups
else:
histogram_groups = groups
for group in histogram_groups:
group_name = group.getAttribute('name')
try:
new_histogram_name = _ExpandHistogramNameWithFieldTrial(
group_name, histogram_name, fieldtrial)
if new_histogram_name != histogram_name:
histograms[new_histogram_name] = copy.deepcopy(
histograms[histogram_name])
group_label = group_labels.get(group_name, '')
if not 'fieldtrial_groups' in histograms[new_histogram_name]:
histograms[new_histogram_name]['fieldtrial_groups'] = []
histograms[new_histogram_name]['fieldtrial_groups'].append(group_name)
if not 'fieldtrial_names' in histograms[new_histogram_name]:
histograms[new_histogram_name]['fieldtrial_names'] = []
histograms[new_histogram_name]['fieldtrial_names'].append(name)
if not 'fieldtrial_labels' in histograms[new_histogram_name]:
histograms[new_histogram_name]['fieldtrial_labels'] = []
histograms[new_histogram_name]['fieldtrial_labels'].append(
group_label)
except Error:
have_errors = True
return have_errors
def ExtractHistogramsFromFile(file_handle):
"""Compute the histogram names and descriptions from the XML representation.
Args:
file_handle: A file or file-like with XML content.
Returns:
a tuple of (histograms, status) where histograms is a dictionary mapping
histogram names to dictionaries containing histogram descriptions and status
is a boolean indicating if errros were encoutered in processing.
"""
tree = xml.dom.minidom.parse(file_handle)
_NormalizeAllAttributeValues(tree)
enums, enum_errors = _ExtractEnumsFromXmlTree(tree)
histograms, histogram_errors = _ExtractHistogramsFromXmlTree(tree, enums)
update_errors = _UpdateHistogramsWithFieldTrialInformation(tree, histograms)
return histograms, enum_errors or histogram_errors or update_errors
def ExtractHistograms(filename):
"""Load histogram definitions from a disk file.
Args:
filename: a file path to load data from.
Raises:
Error if the file is not well-formatted.
"""
with open(filename, 'r') as f:
histograms, had_errors = ExtractHistogramsFromFile(f)
if had_errors:
logging.error('Error parsing %s' % filename)
raise Error()
return histograms
def ExtractNames(histograms):
return sorted(histograms.keys())