-
- data = """100m 14 Aug 2016
- Usain Bolt 21 AUG 1986
- Justin Gatlin 10 FEB 1982
- Andrew De Grasse 10 NOV 1994
- Yohan Blake 26 DEC 1989
- Akani Simbine 21 SEP 1993
- Ben Youssef Meite 11 NOV 1986
- Jimmy Vicaut 27 FEB 1992
- Trayvon Bromell 10 JUL 1995
-
- 200m 18 Aug 2016
- Usain Bolt 21 AUG 1986
- Andre De Grasse 10 NOV 1994
- Christophe Lemaitre 11 JUN 1990
- Adam Gemili 06 OCT 1993
- Churandy Martina 03 JUL 1984
- Lashawn Merritt 27 JUN 1986
- Alonso Edward 08 DEC 1989
- Ramil Guliyev 29 MAY 1990
-
- 400m 14 Aug 2016
- Wayde Van Niekerk 15 Jul 1992
- Kirani James 1 Sep 1992
- Lashawn Merritt 27 Jun 1986
- Machel Cedenio 6 Sep 1995
- Karabo Sibanda 2 Jul 1998
- Ali Khamis Khamis 30 Jun 1995
- Bralon Taplin 8 May 1992
- Matthew Hudson-Smith 26 Oct 1994
-
- 800m 15 Aug 2016
- David Lekuta Rudisha 17 Dec 1988
- TAOUFIK MAKHLOUFI 29 APR 1988
- CLAYTON MURPHY 26 FEB 1995
- PIERRE-AMBROISE BOSSE 11 MAY 1992
- FERGUSON CHERUIYOT ROTICH 30 NOV 1989
- MARCIN LEWANDOWSKI 13 JUN 1987
- ALFRED KIPKETER 28 DEC 1996
- BORIS BERIAN 19 DEC 1992
-
- 1500m 20 Aug 2016
- MATTHEW CENTROWITZ 18 OCT 1989
- TAOUFIK MAKHLOUFI 29 APR 1988
- NICHOLAS WILLIS 25 APR 1983
- AYANLEH SOULEIMAN 03 DEC 1992
- ABDALAATI IGUIDER 25 MAR 1987
- DAVID BUSTOS 25 AUG 1990
- BEN BLANKENSHIP 15 DEC 1988
- RYAN GREGSON 26 APR 1990
-
- 5000m 20 Aug 2016
- MOHAMED FARAH 23 MAR 1983
- PAUL KIPKEMOI CHELIMO 27 OCT 1990
- HAGOS GEBRHIWET 11 MAY 1994
- MOHAMMED AHMED 05 JAN 1991
- BERNARD LAGAT 12 DEC 1974
- ANDREW BUTCHART 14 OCT 1991
- ALBERT KIBICHII ROP 17 JUL 1992
- JOSHUA KIPRUI CHEPTEGEI 12 SEP 1996
-
- 10000m 13 Aug 2016
- MOHAMED FARAH 23 MAR 1983
- PAUL KIPNGETICH TANUI 22 DEC 1990
- TAMIRAT TOLA 11 AUG 1991
- YIGREM DEMELASH 26 JAN 1994
- GALEN RUPP 08 MAY 1986
- JOSHUA KIPRUI CHEPTEGEI 12 SEP 1996
- BEDAN KAROKI MUCHIRI 21 AUG 1990
- ZERSENAY TADESE 08 FEB 1982
-
- 42195m 21 Aug 2016
- Eliud Kipchoge 5 NOV 1984
- FEYISA LILESA 01 FEB 1990
- GALEN RUPP 08 MAY 1986
- GHIRMAY GHEBRESLASSIE 14 NOV 1995
- ALPHONCE FELIX SIMBU 14 FEB 1992
- JARED WARD 09 SEP 1988
- TADESSE ABRAHAM 12 AUG 1982
- MUNYO SOLOMON MUTAI 22 OCT 1992
- """
-
- data2 = """
- 100m 27 JUL 1996
- 30 DEC 1973
- 21 JUN 1972
- 12 DEC 1974
- 02 NOV 1975
- 31 AUG 1968
- 28 DEC 1973
- 11 AUG 1963
- 30 DEC 1970
-
- 200m 1 AUG 1996
- 02 OCT 1967
- 31 DEC 1965
- 30 MAR 1976
- 23 JAN 1970
- 24 FEB 1975
- 30 NOV 1962
- 16 APR 1967
- 01 NOV 1970
-
- 400m 29 JUL 1996
- 31 MAR 1966
- 15 NOV 1969
- 23 NOV 1972
- 16 NOV 1973
- 11 MAY 1970
- 03 MAR 1968
- 04 APR 1975
- 18 NOV 1971
-
- 800m 31 JUL 1996
- 16 SEP 1972
- 07 JUN 1972
- 27 MAY 1961
- 09 APR 1974
- 27 JUN 1969
- 06 OCT 1972
- 20 NOV 1968
-
- 1500m 3 AUG 1996
- 28 FEB 1970
- 16 FEB 1969
- 24 OCT 1970
- 20 JAN 1969
- 22 FEB 1964
- 28 DEC 1962
- 03 NOV 1972
- 24 APR 1976
-
- 5000m 3 AUG 1996
- 9 DEC 1973
- 26 JUN 1970
- 7 AUG 1969
- 9 FEB 1965
- 27 SEP 1971
- 18 AUG 1970
- 25 FEB 1968
- 15 APR 1968
-
- 10000m 29 JUL 1996
- 18 APR 1973
- 17 JUN 1969
- 16 JAN 1972
- 18 JUN 1966
- 12 DEC 1973
- 25 JUN 1969
- 29 JAN 1967
- 14 JUL 1964
-
- 42195m 4 AUG 1996
- 15 APR 1971
- 11 OCT 1970
- 19 DEC 1973
- 3 MAR 1963
- 6 JAN 1964
- 9 JAN 1968
- 26 SEP 1962
- 7 AUG 1962
- """
- # Sources: https://www.olympic.org/ and Wikipedia
- # (the official results site is some missing information on some finals and the race dates)
-
- from datetime import datetime
- from matplotlib import pyplot as plt
- import numpy as np
-
- def conv(x):
- return datetime.strptime(x, '%d %b %Y')
-
- def extract(data):
- lines = [x for x in data.split("\n") if x]
- dates = []
- dist = []
- runners = []
-
- for line in lines:
- if "\t" not in line: line = " \t" + line
- a, b = line.split("\t")
- if line[0].isdigit():
- dist.append(int(a[:-1]))
- dates.append(conv(b))
- runners.append([])
- else:
- runners[-1].append(conv(b))
-
- assert(all(len(x) in [7, 8] for x in runners))
- avg = [sum((dates[i] - y).days for y in runners[i]) / 365.2422 / len(runners[i]) for i in xrange(len(dist))]
- for i in xrange(len(dist)):
- print "%6sm: %.1f" % (dist[i], avg[i])
-
- even = sum(sum([x.year % 2 == 0 for x in y]) for y in runners)
- total = sum(len(x) for x in runners)
- print "%.1f%% (%d out of %d) born in even year" % (100.*even/total, even, total)
- return dates, dist, runners, avg
-
- dates, dist, runners, avg = extract(data)
- plt.semilogx(dist, avg, 'r.', mew=5, ms=10, label='Rio 2016')
- plt.plot(dist, np.poly1d(np.polyfit(np.log(dist), avg, 1))(np.log(dist)), 'r--')
- plt.tick_params(axis='x', which='minor', bottom='off', top='off')
-
- dates, dist, runners, avg = extract(data2)
- plt.semilogx(dist, avg, 'b.', mew=5, ms=10, label='Atlanta 1996')
- plt.plot(dist, np.poly1d(np.polyfit(np.log(dist), avg, 1))(np.log(dist)), 'b--')
-
- plt.xticks(dist, map(str, dist))
- plt.xlabel('Race distance (m)', fontsize=18)
- plt.xlim([plt.xlim()[0] - 20, plt.xlim()[1]])
- plt.ylim([plt.ylim()[0] - 0.2, plt.ylim()[1] + 0.2])
- plt.ylabel('Mean age of finalists (years)', fontsize=18)
- plt.title('Average age of Olympic finalists with race distance', fontsize=20)
- plt.legend(loc='upper left', numpoints=1)
- plt.tight_layout()
- plt.show()
-