''' lecture16_linear_regression.py Demo of using Numpy to compute linear regression, R^2, and p-value of slope test vs. slope = 0 Using diamond dataset Oliver W. Layton CS251: Data Analysis and Visualization Spring 2019 ''' import numpy as np from scipy import stats import matplotlib.pyplot as plt import seaborn as sns sns.set_style("darkgrid") # Load diamond data CSV file (included in seaborn module) # Randomly sample 500 points # Do the linear regression using # x: 'carat' # y: 'price' # Plot data sample # superimpose linear regression line # Plot styling stuff plt.title(f'y = {m:.1f}x+{b:.1f}') plt.xlabel('Diamond carat') plt.ylabel('Price ($)') plt.text(1, 15, f'$R^2$ = {r**2:.2f}', fontsize=15) plt.text(2, 15, f'$p$ = {p:.2}', fontsize=15) # Show us the plot in a pop-up window plt.show()