News

New paper! in the American Naturalist

Friday, January 5, 2018

PCA first trial



from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import pandas as pd

def pcaAnalysis(n_comp,df):
    """df: pd.DataFrame
         n_comp: the number of components
    """
    pca = PCA(n_components=n_comp)
    df = df.dropna(subset=['diameter','radius','aveclust','local','global','var','sc'],how='any') # how='any': drop the row that has more than one nan
    X = df[['diameter','radius','aveclust','local','global','var','sc']]
    X = pca.fit_transform(X)
    ax = plt.subplot(111)
    ax.scatter(X[:,0],X[:,1])
    for i,ind in enumerate(df.index):
        txt = df['average'][ind]
        ax.annotate(txt, (X[:,0][i],X[:,1][i]))
    plt.show()