Date

I spent a little bit of time looking into Python string concatenation times. I learned that newer versions of Python implement an optimzation for the plus equals operator. In the future I'd like to know what this optimization is. It seemed simple enough to escape by introducing an intermediate variable. Anyway, here is the code:

Method 1 - Optimized in CPython

def concat1(times):
    val = ""
    for _ in range(0, times):
        val += s
    return val

Method 2 - Not optimized in CPython

def concat2(times):
    val = ""
    for _ in range(0, times):
        val2 = val + s
        val = val2
    return val

Method 3

def concat3(times):
    val = []
    for _ in range(0, times):
        val.append(s)
    return ''.join(s)

I ran these functions with timeit and produced this graph:

Here is all the code:

# !/usr/bin/env python
# -*- coding: utf-8 -*-

# The purpose of this file is to measure the time it takes to concatenate strings
# rather, the big-O runtime of different methods

from timeit import Timer

def main():
    """
    The main function
    """
    # If any observation exceeds this value then terminate the experiment
    observation_limit = 2
    step = 1000
    data_points = 100
    fns = [concat1, concat2, concat3]
    for i in range(0, data_points * step, step):
        times = [Timer(lambda: fn(i)).timeit(number=5) for fn in fns]
        time_strings = ",".join([str(t) for t in times])
        print("{},{}".format(i, time_strings))
        if any([t > observation_limit for t in times]):
            break

s = "test"

# Optimized by cpython for linear time
def concat1(times):
    val = ""
    for _ in range(0, times):
        val += s
    return val

# Not optimized by cpython
def concat2(times):
    val = ""
    for _ in range(0, times):
        val2 = val + s
        val = val2
    return val

# ???
def concat3(times):
    val = []
    for _ in range(0, times):
        val.append(s)
    return ''.join(s)

if __name__ == "__main__":
    main()