I spent a little bit of time looking into Python string concatenation times. I learned that newer versions of Python implement an optimzation for the plus equals operator. In the future I'd like to know what this optimization is. It seemed simple enough to escape by introducing an intermediate variable. Anyway, here is the code:
Method 1 - Optimized in CPython
def concat1(times):
val = ""
for _ in range(0, times):
val += s
return val
Method 2 - Not optimized in CPython
def concat2(times):
val = ""
for _ in range(0, times):
val2 = val + s
val = val2
return val
Method 3
def concat3(times):
val = []
for _ in range(0, times):
val.append(s)
return ''.join(s)
I ran these functions with timeit
and produced this graph:
Here is all the code:
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# The purpose of this file is to measure the time it takes to concatenate strings
# rather, the big-O runtime of different methods
from timeit import Timer
def main():
"""
The main function
"""
# If any observation exceeds this value then terminate the experiment
observation_limit = 2
step = 1000
data_points = 100
fns = [concat1, concat2, concat3]
for i in range(0, data_points * step, step):
times = [Timer(lambda: fn(i)).timeit(number=5) for fn in fns]
time_strings = ",".join([str(t) for t in times])
print("{},{}".format(i, time_strings))
if any([t > observation_limit for t in times]):
break
s = "test"
# Optimized by cpython for linear time
def concat1(times):
val = ""
for _ in range(0, times):
val += s
return val
# Not optimized by cpython
def concat2(times):
val = ""
for _ in range(0, times):
val2 = val + s
val = val2
return val
# ???
def concat3(times):
val = []
for _ in range(0, times):
val.append(s)
return ''.join(s)
if __name__ == "__main__":
main()