#!/usr/bin/env python # coding: utf-8 # # Solution-05 # # This notebook contains the solution to the Bayesian model of 47 Ursae Majoris. # # Your task to find the orbital parameters of the first planet in this system, using data from table 1 of [Fischer et al 2002](http://iopscience.iop.org/article/10.1086/324336/meta). # # The following IPython magic command will create the data file ``47UrsaeMajoris.txt`` in the current directory: # In[ ]: get_ipython().run_cell_magic('file', '47UrsaeMajoris.txt', 'date rv rv_err\n6959.737 -60.48 14.00\n7194.912 -53.60 7.49\n7223.798 -38.36 6.14\n7964.893 0.60 8.19\n8017.730 -28.29 10.57\n8374.771 -40.25 9.37\n8647.897 42.37 11.41\n8648.910 32.64 11.02\n8670.878 55.45 11.45\n8745.691 51.78 8.76\n8992.061 4.49 11.21\n9067.771 -14.63 7.00\n9096.734 -26.06 6.79\n9122.691 -47.38 7.91\n9172.686 -38.22 10.55\n9349.912 -52.21 9.52\n9374.964 -48.69 8.67\n9411.839 -36.01 12.81\n9481.720 -52.46 13.40\n9767.918 38.58 5.48\n9768.908 36.68 5.02\n9802.789 37.93 3.85\n10058.079 15.82 3.45\n10068.980 15.46 4.63\n10072.012 21.20 4.09\n10088.994 1.30 4.25\n10089.947 6.12 3.70\n10091.900 0.00 4.16\n10120.918 4.07 4.16\n10124.905 0.29 3.74\n10125.823 -1.87 3.79\n10127.898 -0.68 4.10\n10144.877 -4.13 5.26\n10150.797 -8.14 4.18\n10172.829 -10.79 4.43\n10173.762 -9.33 5.43\n10181.742 -23.87 3.28\n10187.740 -16.70 4.67\n10199.730 -16.29 3.98\n10203.733 -21.84 4.92\n10214.731 -24.51 3.67\n10422.018 -56.63 4.23\n10438.001 -39.61 3.91\n10442.027 -44.62 4.05\n10502.853 -32.05 4.69\n10504.859 -39.08 4.65\n10536.845 -22.46 5.18\n10537.842 -22.83 4.16\n10563.673 -17.47 4.03\n10579.697 -11.01 3.84\n10610.719 -8.67 3.52\n10793.957 37.00 3.78\n10795.039 41.85 4.80\n10978.684 36.42 5.01\n11131.066 13.56 6.61\n11175.027 -3.74 8.17\n11242.842 -21.85 5.43\n11303.712 -48.75 4.63\n11508.070 -51.65 8.37\n11536.064 -72.44 4.73\n11540.999 -57.58 5.97\n11607.916 -43.94 4.94\n11626.771 -39.14 7.03\n11627.754 -50.88 6.21\n11628.727 -51.52 5.87\n11629.832 -51.86 4.60\n11700.693 -24.58 5.20\n11861.049 14.64 5.33\n11874.068 14.15 5.75\n11881.045 18.02 4.15\n11895.068 16.96 4.60\n11906.014 11.73 4.07\n11907.011 22.83 4.38\n11909.042 23.42 3.78\n11910.955 18.34 4.33\n11914.067 15.45 5.37\n11915.048 24.05 3.82\n11916.033 23.16 3.67\n11939.969 27.53 5.08\n11946.960 21.44 4.18\n11969.902 30.99 4.58\n11971.894 38.36 5.01\n11998.779 33.82 3.93\n11999.820 27.52 3.98\n12000.858 23.40 4.07\n12028.740 37.08 4.95\n12033.746 26.28 5.24\n12040.759 31.12 3.54\n12041.719 34.04 3.45\n12042.695 31.38 3.98\n12073.723 21.81 4.73\n') # But first, we'll copy some relevant material from the first notebook. # ## Preliminaries # In[ ]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import numpy as np import pandas as pd import seaborn; seaborn.set() #nice plot formatting # ## The Radial Velocity Model # In[ ]: from collections import namedtuple params = namedtuple('params', ['T', 'e', 'K', 'V', 'omega', 'chi', 's']) # In[ ]: from scipy import optimize @np.vectorize def compute_E(M, e): """Solve Kepler's eqns for eccentric anomaly given mean anomaly""" f = lambda E, M=M, e=e: E - e * np.sin(E) - M return optimize.brentq(f, 0, 2 * np.pi) def radial_velocity(t, theta): """Compute radial velocity given orbital parameters""" T, e, K, V, omega, chi = theta[:6] # compute mean anomaly (0 <= M < 2pi) M = 2 * np.pi * ((t / T + chi) % 1) # solve for eccentric anomaly E = compute_E(M, e) # compute true anomaly f = 2 * np.arctan2(np.sqrt(1 + e) * np.sin(E / 2), np.sqrt(1 - e) * np.cos(E / 2)) # compute radial velocity return V - K * (np.sin(f + omega) + e * np.sin(omega)) # ## The Prior, Likelihood, and Posterior # In[ ]: theta_lim = params(T=(0.2, 2000), e=(0, 1), K=(0.01, 2000), V=(-2000, 2000), omega=(0, 2 * np.pi), chi=(0, 1), s=(0.001, 100)) theta_min, theta_max = map(np.array, zip(*theta_lim)) def log_prior(theta): if np.any(theta < theta_min) or np.any(theta > theta_max): return -np.inf # log(0) # Jeffreys Prior on T, K, and s return -np.sum(np.log(theta[[0, 2, 6]])) def log_likelihood(theta, t, rv, rv_err): sq_err = rv_err ** 2 + theta[6] ** 2 rv_model = radial_velocity(t, theta) return -0.5 * np.sum(np.log(sq_err) + (rv - rv_model) ** 2 / sq_err) def log_posterior(theta, t, rv, rv_err): ln_prior = log_prior(theta) if np.isinf(ln_prior): return ln_prior else: return ln_prior + log_likelihood(theta, t, rv, rv_err) def make_starting_guess(t, rv, rv_err): model = LombScargleFast() model.optimizer.set(period_range=theta_lim.T, quiet=True) model.fit(t, rv, rv_err) rv_range = 0.5 * (np.max(rv) - np.min(rv)) rv_center = np.mean(rv) return params(T=model.best_period, e=0.1, K=rv_range, V=rv_center, omega=np.pi, chi=0.5, s=rv_err.mean()) # ## The Data # After running the above command, your data will be in a file called ``47UrsaeMajoris.txt``. # # An easy way to load data in this form is the [pandas](http://pandas.pydata.org) package, which implements a DataFrame object (basically, a labeled data table). # Reading the CSV file is a one-line operation: # In[ ]: import pandas as pd data = pd.read_csv('47UrsaeMajoris.txt', delim_whitespace=True) t, rv, rv_err = data.values.T # ### Visualize the Data # In[ ]: # Start by Visualizing the Data plt.errorbar(t, rv, rv_err, fmt='.'); # ### Compute the Periodogram # In[ ]: # Compute the periodogram to look for significant periodicity from gatspy.periodic import LombScargleFast model = LombScargleFast() model.fit(t, rv, rv_err) periods, power = model.periodogram_auto() plt.semilogx(periods, power) plt.xlim(0, 10000); # ### Initialize and run the MCMC Chain # In[ ]: theta_guess = make_starting_guess(t, rv, rv_err) theta_guess # In[ ]: import emcee ndim = len(theta_guess) # number of parameters in the model nwalkers = 50 # number of MCMC walkers # start with a tight distribution of theta around the initial guess rng = np.random.RandomState(42) starting_guesses = theta_guess * (1 + 0.1 * rng.randn(nwalkers, ndim)) # create the sampler; fix the random state for replicability sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=(t, rv, rv_err)) sampler.random_state = rng # time and run the MCMC get_ipython().run_line_magic('time', 'pos, prob, state = sampler.run_mcmc(starting_guesses, 1000)') # ### Plot the chains: have they stabilized? # In[ ]: def plot_chains(sampler): fig, ax = plt.subplots(7, figsize=(8, 10), sharex=True) for i in range(7): ax[i].plot(sampler.chain[:, :, i].T, '-k', alpha=0.2); ax[i].set_ylabel(params._fields[i]) plot_chains(sampler) # In[ ]: sampler.reset() get_ipython().run_line_magic('time', 'pos, prob, state = sampler.run_mcmc(pos, 1000)') # In[ ]: plot_chains(sampler) # # ### Make a Corner Plot for the Parameters # In[ ]: import corner corner.corner(sampler.flatchain[:, :2], labels=params._fields[:2]); # ### Plot the Model Fits over the Data # In[ ]: t_fit = np.linspace(t.min(), t.max(), 1000) rv_fit = [radial_velocity(t_fit, sampler.flatchain[i]) for i in rng.choice(sampler.flatchain.shape[0], 200)] plt.figure(figsize=(14, 6)) plt.errorbar(t, rv, rv_err, fmt='.k') plt.plot(t_fit, np.transpose(rv_fit), '-k', alpha=0.01) plt.xlabel('time (days)') plt.ylabel('radial velocity (km/s)'); # ### Results: Report your (joint) uncertainties on period and eccentricity # In[ ]: mean = sampler.flatchain.mean(0) std = sampler.flatchain.std(0) print("Period = {0:.0f} +/- {1:.0f} days".format(mean[0], std[0])) print("eccentricity = {0:.2f} +/- {1:.2f}".format(mean[1], std[1])) # In this case, a simple mean and standard deviation is probably not the best summary of the data, because the posterior has multiple modes. We could isolate the strongest mode and then get a better estimate: # In[ ]: chain = sampler.flatchain[sampler.flatchain[:, 0] > 1050] mean = chain.mean(0) std = chain.std(0) print("Period = {0:.0f} +/- {1:.0f} days".format(mean[0], std[0])) print("eccentricity = {0:.2f} +/- {1:.2f}".format(mean[1], std[1])) corner.corner(chain[:, :2], labels=params._fields[:2]); # This is a better representation of the parameter estimates for the most prominant peak. # ## Extra Credit # # If you finish early, try tackling this... # # One reason we see multiple modes in the posterior is that there are actually multiple planets in the system! For example, the source of the above data is a paper which actually reports *two* detected planets. # # Build a Bayesian model which models two planets simultaneously: can you find signals from both planets in the data?