spacepaste

  1.  
  2. data = """100m 14 Aug 2016
  3. Usain Bolt 21 AUG 1986
  4. Justin Gatlin 10 FEB 1982
  5. Andrew De Grasse 10 NOV 1994
  6. Yohan Blake 26 DEC 1989
  7. Akani Simbine 21 SEP 1993
  8. Ben Youssef Meite 11 NOV 1986
  9. Jimmy Vicaut 27 FEB 1992
  10. Trayvon Bromell 10 JUL 1995
  11. 200m 18 Aug 2016
  12. Usain Bolt 21 AUG 1986
  13. Andre De Grasse 10 NOV 1994
  14. Christophe Lemaitre 11 JUN 1990
  15. Adam Gemili 06 OCT 1993
  16. Churandy Martina 03 JUL 1984
  17. Lashawn Merritt 27 JUN 1986
  18. Alonso Edward 08 DEC 1989
  19. Ramil Guliyev 29 MAY 1990
  20. 400m 14 Aug 2016
  21. Wayde Van Niekerk 15 Jul 1992
  22. Kirani James 1 Sep 1992
  23. Lashawn Merritt 27 Jun 1986
  24. Machel Cedenio 6 Sep 1995
  25. Karabo Sibanda 2 Jul 1998
  26. Ali Khamis Khamis 30 Jun 1995
  27. Bralon Taplin 8 May 1992
  28. Matthew Hudson-Smith 26 Oct 1994
  29. 800m 15 Aug 2016
  30. David Lekuta Rudisha 17 Dec 1988
  31. TAOUFIK MAKHLOUFI 29 APR 1988
  32. CLAYTON MURPHY 26 FEB 1995
  33. PIERRE-AMBROISE BOSSE 11 MAY 1992
  34. FERGUSON CHERUIYOT ROTICH 30 NOV 1989
  35. MARCIN LEWANDOWSKI 13 JUN 1987
  36. ALFRED KIPKETER 28 DEC 1996
  37. BORIS BERIAN 19 DEC 1992
  38. 1500m 20 Aug 2016
  39. MATTHEW CENTROWITZ 18 OCT 1989
  40. TAOUFIK MAKHLOUFI 29 APR 1988
  41. NICHOLAS WILLIS 25 APR 1983
  42. AYANLEH SOULEIMAN 03 DEC 1992
  43. ABDALAATI IGUIDER 25 MAR 1987
  44. DAVID BUSTOS 25 AUG 1990
  45. BEN BLANKENSHIP 15 DEC 1988
  46. RYAN GREGSON 26 APR 1990
  47. 5000m 20 Aug 2016
  48. MOHAMED FARAH 23 MAR 1983
  49. PAUL KIPKEMOI CHELIMO 27 OCT 1990
  50. HAGOS GEBRHIWET 11 MAY 1994
  51. MOHAMMED AHMED 05 JAN 1991
  52. BERNARD LAGAT 12 DEC 1974
  53. ANDREW BUTCHART 14 OCT 1991
  54. ALBERT KIBICHII ROP 17 JUL 1992
  55. JOSHUA KIPRUI CHEPTEGEI 12 SEP 1996
  56. 10000m 13 Aug 2016
  57. MOHAMED FARAH 23 MAR 1983
  58. PAUL KIPNGETICH TANUI 22 DEC 1990
  59. TAMIRAT TOLA 11 AUG 1991
  60. YIGREM DEMELASH 26 JAN 1994
  61. GALEN RUPP 08 MAY 1986
  62. JOSHUA KIPRUI CHEPTEGEI 12 SEP 1996
  63. BEDAN KAROKI MUCHIRI 21 AUG 1990
  64. ZERSENAY TADESE 08 FEB 1982
  65. 42195m 21 Aug 2016
  66. Eliud Kipchoge 5 NOV 1984
  67. FEYISA LILESA 01 FEB 1990
  68. GALEN RUPP 08 MAY 1986
  69. GHIRMAY GHEBRESLASSIE 14 NOV 1995
  70. ALPHONCE FELIX SIMBU 14 FEB 1992
  71. JARED WARD 09 SEP 1988
  72. TADESSE ABRAHAM 12 AUG 1982
  73. MUNYO SOLOMON MUTAI 22 OCT 1992
  74. """
  75. data2 = """
  76. 100m 27 JUL 1996
  77. 30 DEC 1973
  78. 21 JUN 1972
  79. 12 DEC 1974
  80. 02 NOV 1975
  81. 31 AUG 1968
  82. 28 DEC 1973
  83. 11 AUG 1963
  84. 30 DEC 1970
  85. 200m 1 AUG 1996
  86. 02 OCT 1967
  87. 31 DEC 1965
  88. 30 MAR 1976
  89. 23 JAN 1970
  90. 24 FEB 1975
  91. 30 NOV 1962
  92. 16 APR 1967
  93. 01 NOV 1970
  94. 400m 29 JUL 1996
  95. 31 MAR 1966
  96. 15 NOV 1969
  97. 23 NOV 1972
  98. 16 NOV 1973
  99. 11 MAY 1970
  100. 03 MAR 1968
  101. 04 APR 1975
  102. 18 NOV 1971
  103. 800m 31 JUL 1996
  104. 16 SEP 1972
  105. 07 JUN 1972
  106. 27 MAY 1961
  107. 09 APR 1974
  108. 27 JUN 1969
  109. 06 OCT 1972
  110. 20 NOV 1968
  111. 1500m 3 AUG 1996
  112. 28 FEB 1970
  113. 16 FEB 1969
  114. 24 OCT 1970
  115. 20 JAN 1969
  116. 22 FEB 1964
  117. 28 DEC 1962
  118. 03 NOV 1972
  119. 24 APR 1976
  120. 5000m 3 AUG 1996
  121. 9 DEC 1973
  122. 26 JUN 1970
  123. 7 AUG 1969
  124. 9 FEB 1965
  125. 27 SEP 1971
  126. 18 AUG 1970
  127. 25 FEB 1968
  128. 15 APR 1968
  129. 10000m 29 JUL 1996
  130. 18 APR 1973
  131. 17 JUN 1969
  132. 16 JAN 1972
  133. 18 JUN 1966
  134. 12 DEC 1973
  135. 25 JUN 1969
  136. 29 JAN 1967
  137. 14 JUL 1964
  138. 42195m 4 AUG 1996
  139. 15 APR 1971
  140. 11 OCT 1970
  141. 19 DEC 1973
  142. 3 MAR 1963
  143. 6 JAN 1964
  144. 9 JAN 1968
  145. 26 SEP 1962
  146. 7 AUG 1962
  147. """
  148. # Sources: https://www.olympic.org/ and Wikipedia
  149. # (the official results site is some missing information on some finals and the race dates)
  150. from datetime import datetime
  151. from matplotlib import pyplot as plt
  152. import numpy as np
  153. def conv(x):
  154. return datetime.strptime(x, '%d %b %Y')
  155. def extract(data):
  156. lines = [x for x in data.split("\n") if x]
  157. dates = []
  158. dist = []
  159. runners = []
  160. for line in lines:
  161. if "\t" not in line: line = " \t" + line
  162. a, b = line.split("\t")
  163. if line[0].isdigit():
  164. dist.append(int(a[:-1]))
  165. dates.append(conv(b))
  166. runners.append([])
  167. else:
  168. runners[-1].append(conv(b))
  169. assert(all(len(x) in [7, 8] for x in runners))
  170. avg = [sum((dates[i] - y).days for y in runners[i]) / 365.2422 / len(runners[i]) for i in xrange(len(dist))]
  171. for i in xrange(len(dist)):
  172. print "%6sm: %.1f" % (dist[i], avg[i])
  173. even = sum(sum([x.year % 2 == 0 for x in y]) for y in runners)
  174. total = sum(len(x) for x in runners)
  175. print "%.1f%% (%d out of %d) born in even year" % (100.*even/total, even, total)
  176. return dates, dist, runners, avg
  177. dates, dist, runners, avg = extract(data)
  178. plt.semilogx(dist, avg, 'r.', mew=5, ms=10, label='Rio 2016')
  179. plt.plot(dist, np.poly1d(np.polyfit(np.log(dist), avg, 1))(np.log(dist)), 'r--')
  180. plt.tick_params(axis='x', which='minor', bottom='off', top='off')
  181. dates, dist, runners, avg = extract(data2)
  182. plt.semilogx(dist, avg, 'b.', mew=5, ms=10, label='Atlanta 1996')
  183. plt.plot(dist, np.poly1d(np.polyfit(np.log(dist), avg, 1))(np.log(dist)), 'b--')
  184. plt.xticks(dist, map(str, dist))
  185. plt.xlabel('Race distance (m)', fontsize=18)
  186. plt.xlim([plt.xlim()[0] - 20, plt.xlim()[1]])
  187. plt.ylim([plt.ylim()[0] - 0.2, plt.ylim()[1] + 0.2])
  188. plt.ylabel('Mean age of finalists (years)', fontsize=18)
  189. plt.title('Average age of Olympic finalists with race distance', fontsize=20)
  190. plt.legend(loc='upper left', numpoints=1)
  191. plt.tight_layout()
  192. plt.show()
  193.