data = """100m 14 Aug 2016 Usain Bolt 21 AUG 1986 Justin Gatlin 10 FEB 1982 Andrew De Grasse 10 NOV 1994 Yohan Blake 26 DEC 1989 Akani Simbine 21 SEP 1993 Ben Youssef Meite 11 NOV 1986 Jimmy Vicaut 27 FEB 1992 Trayvon Bromell 10 JUL 1995 200m 18 Aug 2016 Usain Bolt 21 AUG 1986 Andre De Grasse 10 NOV 1994 Christophe Lemaitre 11 JUN 1990 Adam Gemili 06 OCT 1993 Churandy Martina 03 JUL 1984 Lashawn Merritt 27 JUN 1986 Alonso Edward 08 DEC 1989 Ramil Guliyev 29 MAY 1990 400m 14 Aug 2016 Wayde Van Niekerk 15 Jul 1992 Kirani James 1 Sep 1992 Lashawn Merritt 27 Jun 1986 Machel Cedenio 6 Sep 1995 Karabo Sibanda 2 Jul 1998 Ali Khamis Khamis 30 Jun 1995 Bralon Taplin 8 May 1992 Matthew Hudson-Smith 26 Oct 1994 800m 15 Aug 2016 David Lekuta Rudisha 17 Dec 1988 TAOUFIK MAKHLOUFI 29 APR 1988 CLAYTON MURPHY 26 FEB 1995 PIERRE-AMBROISE BOSSE 11 MAY 1992 FERGUSON CHERUIYOT ROTICH 30 NOV 1989 MARCIN LEWANDOWSKI 13 JUN 1987 ALFRED KIPKETER 28 DEC 1996 BORIS BERIAN 19 DEC 1992 1500m 20 Aug 2016 MATTHEW CENTROWITZ 18 OCT 1989 TAOUFIK MAKHLOUFI 29 APR 1988 NICHOLAS WILLIS 25 APR 1983 AYANLEH SOULEIMAN 03 DEC 1992 ABDALAATI IGUIDER 25 MAR 1987 DAVID BUSTOS 25 AUG 1990 BEN BLANKENSHIP 15 DEC 1988 RYAN GREGSON 26 APR 1990 5000m 20 Aug 2016 MOHAMED FARAH 23 MAR 1983 PAUL KIPKEMOI CHELIMO 27 OCT 1990 HAGOS GEBRHIWET 11 MAY 1994 MOHAMMED AHMED 05 JAN 1991 BERNARD LAGAT 12 DEC 1974 ANDREW BUTCHART 14 OCT 1991 ALBERT KIBICHII ROP 17 JUL 1992 JOSHUA KIPRUI CHEPTEGEI 12 SEP 1996 10000m 13 Aug 2016 MOHAMED FARAH 23 MAR 1983 PAUL KIPNGETICH TANUI 22 DEC 1990 TAMIRAT TOLA 11 AUG 1991 YIGREM DEMELASH 26 JAN 1994 GALEN RUPP 08 MAY 1986 JOSHUA KIPRUI CHEPTEGEI 12 SEP 1996 BEDAN KAROKI MUCHIRI 21 AUG 1990 ZERSENAY TADESE 08 FEB 1982 42195m 21 Aug 2016 Eliud Kipchoge 5 NOV 1984 FEYISA LILESA 01 FEB 1990 GALEN RUPP 08 MAY 1986 GHIRMAY GHEBRESLASSIE 14 NOV 1995 ALPHONCE FELIX SIMBU 14 FEB 1992 JARED WARD 09 SEP 1988 TADESSE ABRAHAM 12 AUG 1982 MUNYO SOLOMON MUTAI 22 OCT 1992 """ data2 = """ 100m 27 JUL 1996 30 DEC 1973 21 JUN 1972 12 DEC 1974 02 NOV 1975 31 AUG 1968 28 DEC 1973 11 AUG 1963 30 DEC 1970 200m 1 AUG 1996 02 OCT 1967 31 DEC 1965 30 MAR 1976 23 JAN 1970 24 FEB 1975 30 NOV 1962 16 APR 1967 01 NOV 1970 400m 29 JUL 1996 31 MAR 1966 15 NOV 1969 23 NOV 1972 16 NOV 1973 11 MAY 1970 03 MAR 1968 04 APR 1975 18 NOV 1971 800m 31 JUL 1996 16 SEP 1972 07 JUN 1972 27 MAY 1961 09 APR 1974 27 JUN 1969 06 OCT 1972 20 NOV 1968 1500m 3 AUG 1996 28 FEB 1970 16 FEB 1969 24 OCT 1970 20 JAN 1969 22 FEB 1964 28 DEC 1962 03 NOV 1972 24 APR 1976 5000m 3 AUG 1996 9 DEC 1973 26 JUN 1970 7 AUG 1969 9 FEB 1965 27 SEP 1971 18 AUG 1970 25 FEB 1968 15 APR 1968 10000m 29 JUL 1996 18 APR 1973 17 JUN 1969 16 JAN 1972 18 JUN 1966 12 DEC 1973 25 JUN 1969 29 JAN 1967 14 JUL 1964 42195m 4 AUG 1996 15 APR 1971 11 OCT 1970 19 DEC 1973 3 MAR 1963 6 JAN 1964 9 JAN 1968 26 SEP 1962 7 AUG 1962 """ # Sources: https://www.olympic.org/ and Wikipedia # (the official results site is some missing information on some finals and the race dates) from datetime import datetime from matplotlib import pyplot as plt import numpy as np def conv(x): return datetime.strptime(x, '%d %b %Y') def extract(data): lines = [x for x in data.split("\n") if x] dates = [] dist = [] runners = [] for line in lines: if "\t" not in line: line = " \t" + line a, b = line.split("\t") if line[0].isdigit(): dist.append(int(a[:-1])) dates.append(conv(b)) runners.append([]) else: runners[-1].append(conv(b)) assert(all(len(x) in [7, 8] for x in runners)) avg = [sum((dates[i] - y).days for y in runners[i]) / 365.2422 / len(runners[i]) for i in xrange(len(dist))] for i in xrange(len(dist)): print "%6sm: %.1f" % (dist[i], avg[i]) even = sum(sum([x.year % 2 == 0 for x in y]) for y in runners) total = sum(len(x) for x in runners) print "%.1f%% (%d out of %d) born in even year" % (100.*even/total, even, total) return dates, dist, runners, avg dates, dist, runners, avg = extract(data) plt.semilogx(dist, avg, 'r.', mew=5, ms=10, label='Rio 2016') plt.plot(dist, np.poly1d(np.polyfit(np.log(dist), avg, 1))(np.log(dist)), 'r--') plt.tick_params(axis='x', which='minor', bottom='off', top='off') dates, dist, runners, avg = extract(data2) plt.semilogx(dist, avg, 'b.', mew=5, ms=10, label='Atlanta 1996') plt.plot(dist, np.poly1d(np.polyfit(np.log(dist), avg, 1))(np.log(dist)), 'b--') plt.xticks(dist, map(str, dist)) plt.xlabel('Race distance (m)', fontsize=18) plt.xlim([plt.xlim()[0] - 20, plt.xlim()[1]]) plt.ylim([plt.ylim()[0] - 0.2, plt.ylim()[1] + 0.2]) plt.ylabel('Mean age of finalists (years)', fontsize=18) plt.title('Average age of Olympic finalists with race distance', fontsize=20) plt.legend(loc='upper left', numpoints=1) plt.tight_layout() plt.show()