News

New paper! in the American Naturalist

Friday, April 20, 2018

sklearn: Linear Regression (multiple variables)



import pandas as pd
from sklearn import linear_model

df = # pandas DataFrame
predictors = df[['pre1','pre2',...]] # specify columns for predictors
target = df[['target']]

X = predictors # matrix of multivariables
y = target['target'] # single variable

lm = linear_model.LinearRegression()
model = lm.fit(X,y) # shouldn't contain NaN or infinity

predictions = lm.predict(X) # predicted values

lm.score(X,y) # R^2 score of the model, which is the percentage of explained  variance of the predictions.
lm.coef_ # coefficients of the linear regression model
lm.intercept_ # Y-intercept of the linear regression model