Title: Benchmarking Perl - normal taint support vs. NO_TAINT_SUPPORT
Author: Steffen Schwigon (renormalist, Dresden Perl Mongers)
Date: 2021-09-16
Version: 1
In this evaluation we compare the performance of a "normal" Perl with taint mode to a Perl built with -DNO_TAINT_SUPPORT/-DSILENT_NO_TAINT_SUPPORT.
The approach is described here.
The used toolchain and philosophy is that of Perl::Formance
[1], [2]. The results are stored in a BenchmarkAnything database. The evaluation is done in Jupyter with BenchmarkAnything support libs.
With very few exceptions or outliers Perl with -DNO_TAINT_SUPPORT is generally faster than with taint support by about 1% to 5% for average real world code. There are some more extreme results where the benchmarks are special:
Footnote:
import os
import sys
import pprint
from IPython.display import display, Markdown
module_path = os.path.abspath(os.path.join('../lib'))
if module_path not in sys.path:
sys.path.append(module_path)
from benchmarkanything import Benchmark, Query, Spec
from benchmarkvis import MatplotlibPlotter, BokehPlotter
pp = pprint.PrettyPrinter(indent=4) # pretty printer
QA = 'http://qa:7360/api/v1/search'
HW = {'ss5z' : 'i7-10610U CPU @ 1.80GHz'}
QUALID = 'notaint-2021-a'
HOST = 'ss5z'
NOTAINT = '1'
WITHTAINT = '0'
LIMIT = 10000
STATS_SPEC = ['min', 'max', 'mean', 'stddev', 'ci_95l', 'ci_95u']
METRICS = [
# start with fastest
"perlformance.perl5.SpamAssassin.salearn.ham",
"perlformance.perl5.SpamAssassin.salearn.ham2",
"perlformance.perl5.SpamAssassin.salearn.spam",
"perlformance.perl5.SpamAssassin.salearn.spam2",
"perlformance.perl5.PerlStone2015.binarytrees",
"perlformance.perl5.PerlStone2015.mandelbrot",
"perlformance.perl5.Mandelbrot.withthreads",
"perlformance.perl5.Mandelbrot.withmce",
"perlformance.perl5.PerlStone2015.fib",
"perlformance.perl5.Fib",
"perlformance.perl5.FibMoose",
"perlformance.perl5.FibMouse",
"perlformance.perl5.FibOO",
"perlformance.perl5.FibOOSig",
"perlformance.perl5.AccessorsHash.set",
"perlformance.perl5.AccessorsArray.set",
"perlformance.perl5.AccessorsMoose.set",
"perlformance.perl5.AccessorsMouse.set",
"perlformance.perl5.AccessorsClassAccessor.set",
"perlformance.perl5.DPath.dpath",
"perlformance.perl5.PerlStone2015.fannkuch",
"perlformance.perl5.PerlStone2015.fasta",
"perlformance.perl5.PerlStone2015.regexdna",
"perlformance.perl5.Threads.threadstorm",
"perlformance.perl5.ThreadsShared.threadstorm",
"perlformance.perl5.Mem.allocate",
"perlformance.perl5.Mem.copy",
# fast but micro benchmarks
"perlformance.perl5.PerlStone2015.09data.a_alloc",
"perlformance.perl5.PerlStone2015.09data.a_copy",
"perlformance.perl5.PerlStone2015.07lists.unshift",
"perlformance.perl5.PerlStone2015.07lists.push",
# slower
"perlformance.perl5.PerlStone2015.spectralnorm",
"perlformance.perl5.PerlStone2015.nbody",
"perlformance.perl5.PerlStone2015.01overview.opmix1",
"perlformance.perl5.PerlStone2015.01overview.opmix2",
"perlformance.perl5.PerlStone2015.04control.blocks1",
"perlformance.perl5.PerlStone2015.04control.blocks2",
"perlformance.perl5.PerlStone2015.05regex.fixedstr",
"perlformance.perl5.PerlStone2015.regex.backtrack",
"perlformance.perl5.PerlStone2015.regex.code_literal",
"perlformance.perl5.PerlStone2015.regex.code_runtime",
"perlformance.perl5.PerlStone2015.regex.precomp_access",
"perlformance.perl5.PerlStone2015.regex.runtime_comp",
"perlformance.perl5.PerlStone2015.regex.runtime_comp_nocache",
"perlformance.perl5.PerlStone2015.regex.split1",
"perlformance.perl5.PerlStone2015.regex.split2",
"perlformance.perl5.PerlStone2015.regex.splitratio",
"perlformance.perl5.PerlStone2015.regex.trie_limit",
"perlformance.perl5.MatrixReal.matrix_times_itself.030"
]
METRIC_VARIANTS = [
('withtaint', [['=', 'perlconfig_derived_notaintsupport', WITHTAINT]]),
('NO_TAINT_SUPPORT', [['=', 'perlconfig_derived_notaintsupport', NOTAINT ]])
]
base_query = Query(
select=[
'NAME',
'VALUE',
'CREATED',
'perlconfig_derived_notaintsupport',
'env_perlformance_qualid',
'sysinfo_hostname'
],
where = [
['=', 'env_perlformance_qualid', QUALID],
],
limit = LIMIT
)
vis2 = {}
bmx = {}
stats = {}
def fetchdata():
for metric in METRICS:
bmx[metric] = Benchmark(metric)
stats[metric] = {}
for v in METRIC_VARIANTS:
variant_name = v[0]
variant_constraints = v[1]
query = base_query.variant(
where=[
['=', 'NAME', metric],
['=', 'sysinfo_hostname', HOST]
]
+ variant_constraints
)
query_data = query.post(QA)
bmx[metric].add_data_series(variant_name, query_data)
# --- statistical summary ---
stats_spec = Spec(STATS_SPEC, [])
try:
s = stats_spec.evaluate(query_data)
except Exception as e:
pp.pprint("# Error while getting stats (%s, %s): %s" % \
str(metric, variant_name, e))
continue
stats[metric][variant_name] = s
#pp.pprint (s)
def visualize(metric, type):
vis2 = MatplotlibPlotter()
vis2.start()
if type == 'boxplot':
vis2.boxplot(bmx[metric], \
label = 'duration (seconds, smaller=better)')
if type == 'percentiles':
vis2.percentiles(bmx[metric], \
label = 'duration (seconds, smaller=better)')
if type == 'histogram':
vis2.histogram(bmx[metric], \
label = 'duration (seconds, smaller=better)')
if type == 'timeline':
vis2.timeline(bmx[metric], \
label = 'duration (seconds, smaller=better)')
vis2.show()
fetchdata()
display(Markdown('## ' + HW[HOST]))
for metric in METRICS:
variants = METRIC_VARIANTS
display(Markdown('### ' + metric))
text = ''
text = text + '* min .. ci95l .. **mean** .. ci95u .. max (stdev)\n'
for v in METRIC_VARIANTS:
variant_name = v[0]
variant_constraints = v[1]
try:
s = stats[metric][variant_name]
except Exception as e:
pp.pprint("# No data for stats (%s, %s): %s" % \
str(metric, variant_name, e))
s = [0, 0, 0, 0, 0, 0]
continue
t2 = '* %.2f .. %.2f .. **%.2f** .. %.2f .. %.2f (stdev: %.2f) -- %s\n' % \
(s[0], s[4], s[2], s[5], s[1], s[3], variant_name)
text = text + t2
try:
rel_diff = 100 * \
stats[metric]['NO_TAINT_SUPPORT'][2] / stats[metric]['withtaint'][2]
improvement = 100 - rel_diff
faster_or_slower = '*faster*'
if improvement < 0:
faster_or_slower = '**slower!**'
t3 = '* <u>%.2f%% (%s)</u>\n' % (improvement, faster_or_slower)
text = text + t3
except Exception as e:
pp.pprint("# No stats for diff (%s): %s" % str(metric, e))
display(Markdown(text))
visualize(metric, 'boxplot')
visualize(metric, 'percentiles')
#visualize(metric, 'histogram')
#visualize(metric, 'timeline')
#display(Markdown('<span style="page-break-before: always !important">⚀</span>')) # doesn't help