test_fit_shapiro.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. # https://www.youtube.com/watch?v=yJCSupnOv8w
  2. import numpy as np
  3. from scipy.optimize import curve_fit
  4. from scipy.stats import shapiro
  5. from vedo.pyplot import histogram, plot
  6. from vedo import settings
  7. settings.default_font = "ComicMono"
  8. settings.use_parallel_projection = True
  9. settings.remember_last_figure_format = True
  10. data = [
  11. 196,
  12. 193,
  13. 186,
  14. 154,
  15. 151,
  16. 147,
  17. 141,
  18. 138,
  19. 125,
  20. 110,
  21. 109,
  22. 80,
  23. 67,
  24. 32,
  25. 12,
  26. -103,
  27. -108,
  28. -143,
  29. ]
  30. # Perform the Shapiro-Wilk test to check for normality
  31. statistic, p_value = shapiro(data)
  32. fig = histogram(
  33. data,
  34. title=(
  35. "Shapiro-Wilk test\n"
  36. "on cheating chess players\n"
  37. f"(p-value = {p_value*100:.3f}%)"
  38. ),
  39. xtitle="ELO score variation",
  40. gap=0.02,
  41. label="Data",
  42. xlim=(-300, 300),
  43. )
  44. # Fit the data with a double gaussian
  45. def func(x, a0, sigma0, a1, mean1, sigma1):
  46. g0 = a0 * np.exp(-(x )**2 /2 /sigma0**2) # background
  47. g1 = a1 * np.exp(-(x - mean1)**2 /2 /sigma1**2) # signal
  48. return g0 + g1
  49. xdata = fig.centers
  50. ydata = fig.frequencies
  51. fit_params, pcov = curve_fit(func, xdata, ydata, p0=[2,100,2,150,50])
  52. ydata_fit = func(xdata, *fit_params)
  53. ydata_fit_background = func(xdata, fit_params[0], fit_params[1], 0, 0, 1)
  54. fig += plot(xdata, ydata_fit, "-r 0", lw=4, label="Fit")
  55. fig += plot(xdata, ydata_fit_background, "-b", lw=2, label="Bkg")
  56. fig.add_legend()
  57. print("# of cheaters:", np.sum(ydata_fit - ydata_fit_background))
  58. fig.show(zoom="tight")