%matplotlib inline import numpy as np import matplotlib.pyplot as plt N = 50 m_true = 2 b_true = -1 dy = 2.0 np.random.seed(0) xdata = 10 * np.random.random(N) ydata = np.random.normal(b_true + m_true * xdata, dy) plt.errorbar(xdata, ydata, dy, fmt='.k', ecolor='lightgray'); from scipy import optimize optimize? optimize.fmin? def chi2(theta, x, y, dy): # theta = [b, m] return np.sum(((y - theta[0] - theta[1] * x) / dy) ** 2) theta_guess = [0, 1] theta_best = optimize.fmin(chi2, theta_guess, args=(xdata, ydata, dy)) print(theta_best) xfit = np.linspace(0, 10) yfit = theta_best[0] + theta_best[1] * xfit plt.errorbar(xdata, ydata, dy, fmt='.k', ecolor='lightgray'); plt.plot(xfit, yfit, '-k'); optimize.leastsq? def deviations(theta, x, y, dy): return (y - theta[0] - theta[1] * x) / dy theta_best, ier = optimize.leastsq(deviations, theta_guess, args=(xdata, ydata, dy)) print(theta_best) yfit = theta_best[0] + theta_best[1] * xfit plt.errorbar(xdata, ydata, dy, fmt='.k', ecolor='lightgray'); plt.plot(xfit, yfit, '-k'); results = optimize.leastsq(deviations, theta_guess, args=(xdata, ydata, dy), full_output=True) theta_best = results[0] covariance = results[1] print(covariance) print("y = ({0:.2f} +/- {1:.2f})x + ({2:.2f} +/- {3:.2f})" "".format(theta_best[1], np.sqrt(covariance[1, 1]), theta_best[0], np.sqrt(covariance[0, 0]))) np.linalg.solve? np.linalg.lstsq? Xdata = np.vstack([np.ones_like(xdata), xdata]).T theta_best, resid, rank, singvals = np.linalg.lstsq(Xdata, ydata) print(theta_best) yfit = theta_best[0] + theta_best[1] * xfit plt.errorbar(xdata, ydata, dy, fmt='.k', ecolor='lightgray'); plt.plot(xfit, yfit, '-k'); # compute the least-squares solution by-hand! theta_best = np.linalg.solve(np.dot(Xdata.T, Xdata), np.dot(Xdata.T, ydata)) covariance = dy ** 2 * np.linalg.inv(np.dot(Xdata.T, Xdata)) print("y = ({0:.2f} +/- {1:.2f})x + ({2:.2f} +/- {3:.2f})" "".format(theta_best[1], np.sqrt(covariance[1, 1]), theta_best[0], np.sqrt(covariance[0, 0]))) def fit_polynomial(deg=2): p = np.polyfit(xdata, ydata, deg=deg) yfit = np.polyval(p, xfit) plt.errorbar(xdata, ydata, dy, fmt='.k', ecolor='lightgray'); plt.plot(xfit, yfit); rms = np.sqrt(np.mean((ydata - np.polyval(p, xdata)) ** 2)) print("rms error (deg={0}): {1:.2f}".format(deg, rms)) fit_polynomial(2) fit_polynomial(10) # Uncomment and run this to install astroML # !pip install astroML # !pip install astroML_addons %matplotlib inline import astroML.datasets import numpy as np import matplotlib.pyplot as plt lcs = astroML.datasets.fetch_LINEAR_sample() print lcs time, flux, dflux = lcs[18525697].T plt.errorbar(time, flux, yerr=dflux, fmt="ro") plt.gca().invert_yaxis() plt.xlabel("Mean Julian Day") plt.ylabel("Mag") plt.show() from astroML.time_series import lomb_scargle periods = np.logspace(-1, 0, 10000, base=10) periodogram = lomb_scargle(time, flux, dflux, omega=2 * np.pi / periods, generalized=True) plt.plot(periods, periodogram) plt.semilogx() plt.xlabel("Period (days)") plt.ylabel("Power") plt.show() idx = np.argmax(periodogram) print periods[idx], periodogram[idx] period = periods[idx] phase = time / period - time // period print min(phase), max(phase) plt.errorbar(phase, flux, yerr=dflux, fmt="ro") plt.gca().invert_yaxis() plt.xlabel("Phase") plt.ylabel("Mag") plt.show() class Fourier(object): def __init__(self, phase, flux, dflux, nterms): self.phase = phase self.flux = flux self.dflux = dflux self.nterms = nterms assert(self.nterms > 0) def evaluate(self, phase, terms): assert(len(terms) == 2 * self.nterms - 1) model = terms[0] * np.ones(len(phase)) for i in range(self.nterms-1): model += terms[2*i + 1] * np.cos(2 * np.pi * (i+1) * phase + terms[2*i + 2]) return model def chi2(self, args): model = self.evaluate(self.phase, args) chi = (model - self.flux) / self.dflux return np.sum(chi**2) Create a (very basic) instance of this class, and plot it up over a range of phases using its evaluate() method. fourier = Fourier(phase, flux, dflux, 1) mphase = np.arange(0, 1, 0.01) model = fourier.evaluate(mphase, [10]) plt.plot(mphase, model, "k-") plt.gca().invert_yaxis() plt.xlabel("Phase") plt.ylabel("Model mag") plt.show() fourier = Fourier(phase, flux, dflux, 2) mphase = np.arange(0, 1, 0.01) model = fourier.evaluate(mphase, [10, 0.1, 0.0]) plt.plot(mphase, model, "k-") plt.gca().invert_yaxis() plt.xlabel("Phase") plt.ylabel("Model mag") plt.show() fourier = Fourier(phase, flux, dflux, 2) mphase = np.arange(0, 1, 0.01) model = fourier.evaluate(mphase, [10, 0.1, np.pi]) plt.plot(mphase, model, "k-") plt.gca().invert_yaxis() plt.xlabel("Phase") plt.ylabel("Model mag") plt.show() fourier = Fourier(phase, flux, dflux, 2) guess = [] # this needs to be the number of paramters you want to fit for, here the mean brightness, the amplitude, and minimum guess.append(np.mean(fourier.flux)) guess.append(np.std(fourier.flux)) print(np.argmax(fourier.flux)) print(fourier.flux[np.argmax(fourier.flux)]) print(fourier.phase[np.argmax(fourier.flux)]) guess.append(2 * np.pi * (1 - fourier.phase[np.argmax(fourier.flux)])) print(guess) optimize.fmin(fourier.chi2, x0=[guess], disp=0, full_output=1) result = optimize.fmin(fourier.chi2, x0=[guess], disp=0, full_output=1) best_fit = result[0] print(best_fit) plt.errorbar(fourier.phase, fourier.flux, yerr=fourier.dflux, fmt="ro") plt.plot(mphase, fourier.evaluate(mphase, guess), "b--", label="Initial guess") plt.plot(mphase, fourier.evaluate(mphase, best_fit), "g-", label="Best fit") plt.gca().invert_yaxis() plt.xlabel("Phase") plt.ylabel("Mag") plt.legend() plt.show() chi2_1 = result[1] chi2_2 = fourier.chi2(best_fit) print chi2_1, chi2_2, chi2_1 / (len(flux) - len(best_fit) - 1) This is where we start to go down the rabbit hole. Start to add more terms... fourier2 = Fourier(phase, flux, dflux, 3) guess2 = [] # this needs to be the number of paramters you want to fit for, now 2*3-1 = 5 guess2.append(np.mean(fourier2.flux)) guess2.append(np.std(fourier2.flux)) guess2.append(2 * np.pi * (1 - fourier2.phase[np.argsort(fourier2.flux)[-1]])) guess2.append(0.0) guess2.append(2 * np.pi * (1 - fourier2.phase[np.argsort(fourier2.flux)[-1]])) result2 = optimize.fmin_bfgs(fourier2.chi2, x0=[guess2], disp=0, full_output=1) best_fit2 = result2[0] print best_fit2 plt.errorbar(fourier2.phase, fourier2.flux, yerr=fourier2.dflux, fmt="ro") plt.plot(mphase, fourier2.evaluate(mphase, guess2), "b--", label="Initial guess") plt.plot(mphase, fourier2.evaluate(mphase, best_fit2), "g-", label="Best fit") plt.gca().invert_yaxis() plt.xlabel("Phase") plt.ylabel("Mag") plt.show() fourier5 = Fourier(phase, flux, dflux, 5) guess5 = [] # this needs to be the number of paramters you want to fit for, now 9 guess5.append(np.mean(fourier5.flux)) guess5.append(np.std(fourier5.flux)) guess5.append(2 * np.pi * (1 - fourier5.phase[np.argsort(fourier5.flux)[-1]])) for i in range(2, 5): guess5.append(0.0) guess5.append(2 * np.pi * (1 - fourier5.phase[np.argsort(fourier5.flux)[-1]])) result5 = optimize.fmin_bfgs(fourier5.chi2, x0=[guess5], disp=0, full_output=1) best_fit5 = result5[0] print best_fit5 plt.errorbar(fourier5.phase, fourier5.flux, yerr=fourier5.dflux, fmt="ro") plt.plot(mphase, fourier5.evaluate(mphase, guess5), "b--", label="Initial guess") plt.plot(mphase, fourier5.evaluate(mphase, best_fit5), "g-", label="Best fit") plt.gca().invert_yaxis() plt.xlabel("Phase") plt.ylabel("Mag") plt.show() class Fourier2(Fourier): def __init__(self, phase, flux, dflux, nterms): Fourier.__init__(self, phase, flux, dflux, nterms) def makeGuess(self): guess = [] guess.append(np.mean(self.flux)) if self.nterms > 1: guess.append(np.std(self.flux)) guess.append(2 * np.pi * (1 - self.phase[np.argsort(self.flux)[-1]])) for i in range(2, self.nterms): guess.append(0.0) guess.append(2 * np.pi * (1 - self.phase[np.argsort(self.flux)[-1]])) return guess nterms = np.arange(1, 10) chi2 = [] for nterm in nterms: fourier = Fourier2(phase, flux, dflux, nterm) guess = fourier.makeGuess() print(guess) chi2.append(optimize.fmin_bfgs(fourier.chi2, x0=[guess], disp=0, full_output=1)[1]) chi2 = np.array(chi2) plt.plot(nterms, chi2) plt.xlabel("N Terms") plt.ylabel("Chi2") plt.show() delta_chi2 = chi2[:-1] - chi2[1:] plt.plot(nterms[1:], delta_chi2) plt.xlabel("N terms") plt.ylabel("Delta chi2") plt.show() # You will need to figure out the significance of all these delta_chi2 as part of your homework! nterms = np.arange(1, 10) chi2 = [] BICpenalty = [] AICpenalty = [] for nterm in nterms: fourier = Fourier2(phase, flux, dflux, nterm) guess = fourier.makeGuess() BICpenalty.append(len(guess) * np.log(len(fourier.phase))) AICpenalty.append(len(guess) * 2.0) chi2.append(optimize.fmin_bfgs(fourier.chi2, x0=[guess], disp=0, full_output=1)[1]) chi2 = np.array(chi2) BICpenalty = np.array(BICpenalty) AICpenalty = np.array(AICpenalty) print chi2 print BICpenalty print AICpenalty plt.plot(nterms, chi2, label="chi2") plt.plot(nterms, chi2 + BICpenalty, label="BIC") plt.plot(nterms, chi2 + AICpenalty, label="AIC") plt.xlabel("N Terms") plt.ylabel("Chi2") plt.semilogy() plt.legend() plt.show() nterms = np.arange(1, 30, 2) chi2 = [] BICpenalty = [] AICpenalty = [] for nterm in nterms: fourier = Fourier2(phase, flux, dflux, nterm) guess = fourier.makeGuess() BICpenalty.append(len(guess) * np.log(len(fourier.phase))) AICpenalty.append(len(guess) * 2.0) chi2.append(optimize.fmin_bfgs(fourier.chi2, x0=[guess], disp=0, full_output=1)[1]) chi2 = np.array(chi2) BICpenalty = np.array(BICpenalty) AICpenalty = np.array(AICpenalty) plt.plot(nterms, chi2, label="chi2") plt.plot(nterms, chi2 + BICpenalty, label="BIC") plt.plot(nterms, chi2 + AICpenalty, label="AIC") plt.xlabel("N Terms") plt.ylabel("Chi2") plt.semilogy() plt.legend() plt.show() print np.argmin(chi2+BICpenalty), np.argmin(chi2+AICpenalty) print nterms[4], nterms[6]