Solution
Ximi answered on
Dec 02 2021
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Problem 1\n",
"data = [\n",
" [0, 1],\n",
" [1, 1],\n",
" [2, 1],\n",
" [2, 3],\n",
" [3, 2],\n",
" [3, 3],\n",
" [4, 5],\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import StandardScaler"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.decomposition import PCA"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Standardizing the data as per requirement\n",
"scaled_data = StandardScaler().fit_transform(data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pca = PCA(n_components=2)\n",
"pca.fit(scaled_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pca.explained_variance_"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Explained variance\n",
"pca.explained_variance_ratio_"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Transformed data \n",
"pca.transform(data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Applying on non-standardized data \n",
"pca = PCA(n_components=2)\n",
"pca.fit(data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pca.explained_variance_"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pca.explained_variance_ratio_"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Transformed data\n",
"pca.transform(data)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"n_digits: 10, \t n_samples 1797, \t n_features 64\n",
"__________________________________________________________________________________\n",
"init\t\ttime\tinertia\thomo\tcompl\tv-meas\tARI\tAMI\tsilhouette\n",
"k-means++\t0.17s\t69432\t0.602\t0.650\t0.625\t0.465\t0.621\t0.146\n",
"random \t0.16s\t69694\t0.669\t0.710\t0.689\t0.553\t0.686\t0.147\n",
"PCA-based\t0.03s\t70804\t0.671\t0.698\t0.684\t0.561\t0.681\t0.118\n",
"__________________________________________________________________________________\n"
]
},
{
"data": {
"text/plain": [
"