from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import pandas as pd
def pcaAnalysis(n_comp,df):
"""df: pd.DataFrame
n_comp: the number of components
"""
pca = PCA(n_components=n_comp)
df = df.dropna(subset=['diameter','radius','aveclust','local','global','var','sc'],how='any') # how='any': drop the row that has more than one nan
X = df[['diameter','radius','aveclust','local','global','var','sc']]
X = pca.fit_transform(X)
ax = plt.subplot(111)
ax.scatter(X[:,0],X[:,1])
for i,ind in enumerate(df.index):
txt = df['average'][ind]
ax.annotate(txt, (X[:,0][i],X[:,1][i]))
plt.show()