1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
| %% Machine Learning Online Class % Exercise 7 | Principle Component Analysis and K-Means Clustering % % Instructions % ------------ % % This file contains code that helps you get started on the % exercise. You will need to complete the following functions: % % pca.m % projectData.m % recoverData.m % computeCentroids.m % findClosestCentroids.m % kMeansInitCentroids.m % % For this exercise, you will not need to change any code in this file, % or any other files other than those mentioned above. %
%% Initialization clear ; close all; clc
%% ================== Part 1: Load Example Dataset =================== % We start this exercise by using a small dataset that is easily to % visualize % fprintf('Visualizing example dataset for PCA.\n\n');
% The following command loads the dataset. You should now have the % variable X in your environment load ('ex7data1.mat');
% Visualize the example dataset plot(X(:, 1), X(:, 2), 'bo'); axis([0.5 6.5 2 8]); axis square;
fprintf('Program paused. Press enter to continue.\n'); pause;
%% =============== Part 2: Principal Component Analysis =============== % You should now implement PCA, a dimension reduction technique. You % should complete the code in pca.m % fprintf('\nRunning PCA on example dataset.\n\n');
% Before running PCA, it is important to first normalize X [X_norm, mu, sigma] = featureNormalize(X);
% Run PCA [U, S] = pca(X_norm);
% Compute mu, the mean of the each feature
% Draw the eigenvectors centered at mean of data. These lines show the % directions of maximum variations in the dataset. hold on; drawLine(mu, mu + 1.5 * S(1,1) * U(:,1)', '-k', 'LineWidth', 2); drawLine(mu, mu + 1.5 * S(2,2) * U(:,2)', '-k', 'LineWidth', 2); hold off;
fprintf('Top eigenvector: \n'); fprintf(' U(:,1) = %f %f \n', U(1,1), U(2,1)); fprintf('\n(you should expect to see -0.707107 -0.707107)\n');
fprintf('Program paused. Press enter to continue.\n'); pause;
%% =================== Part 3: Dimension Reduction =================== % You should now implement the projection step to map the data onto the % first k eigenvectors. The code will then plot the data in this reduced % dimensional space. This will show you what the data looks like when % using only the corresponding eigenvectors to reconstruct it. % % You should complete the code in projectData.m % fprintf('\nDimension reduction on example dataset.\n\n');
% Plot the normalized dataset (returned from pca) plot(X_norm(:, 1), X_norm(:, 2), 'bo'); axis([-4 3 -4 3]); axis square
% Project the data onto K = 1 dimension K = 1; Z = projectData(X_norm, U, K); fprintf('Projection of the first example: %f\n', Z(1)); fprintf('\n(this value should be about 1.481274)\n\n');
X_rec = recoverData(Z, U, K); fprintf('Approximation of the first example: %f %f\n', X_rec(1, 1), X_rec(1, 2)); fprintf('\n(this value should be about -1.047419 -1.047419)\n\n');
% Draw lines connecting the projected points to the original points hold on; plot(X_rec(:, 1), X_rec(:, 2), 'ro'); for i = 1:size(X_norm, 1) drawLine(X_norm(i,:), X_rec(i,:), '--k', 'LineWidth', 1); end hold off
fprintf('Program paused. Press enter to continue.\n'); pause;
%% =============== Part 4: Loading and Visualizing Face Data ============= % We start the exercise by first loading and visualizing the dataset. % The following code will load the dataset into your environment % fprintf('\nLoading face dataset.\n\n');
% Load Face dataset load ('ex7faces.mat')
% Display the first 100 faces in the dataset displayData(X(1:100, :));
fprintf('Program paused. Press enter to continue.\n'); pause;
%% =========== Part 5: PCA on Face Data: Eigenfaces =================== % Run PCA and visualize the eigenvectors which are in this case eigenfaces % We display the first 36 eigenfaces. % fprintf(['\nRunning PCA on face dataset.\n' ... '(this might take a minute or two ...)\n\n']);
% Before running PCA, it is important to first normalize X by subtracting % the mean value from each feature [X_norm, mu, sigma] = featureNormalize(X);
% Run PCA [U, S] = pca(X_norm);
% Visualize the top 36 eigenvectors found displayData(U(:, 1:36)');
fprintf('Program paused. Press enter to continue.\n'); pause;
%% ============= Part 6: Dimension Reduction for Faces ================= % Project images to the eigen space using the top k eigenvectors % If you are applying a machine learning algorithm fprintf('\nDimension reduction for face dataset.\n\n');
K = 100; Z = projectData(X_norm, U, K);
fprintf('The projected data Z has a size of: ') fprintf('%d ', size(Z));
fprintf('\n\nProgram paused. Press enter to continue.\n'); pause;
%% ==== Part 7: Visualization of Faces after PCA Dimension Reduction ==== % Project images to the eigen space using the top K eigen vectors and % visualize only using those K dimensions % Compare to the original input, which is also displayed
fprintf('\nVisualizing the projected (reduced dimension) faces.\n\n');
K = 100; X_rec = recoverData(Z, U, K);
% Display normalized data subplot(1, 2, 1); displayData(X_norm(1:100,:)); title('Original faces'); axis square;
% Display reconstructed data from only k eigenfaces subplot(1, 2, 2); displayData(X_rec(1:100,:)); title('Recovered faces'); axis square;
fprintf('Program paused. Press enter to continue.\n'); pause;
%% === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization === % One useful application of PCA is to use it to visualize high-dimensional % data. In the last K-Means exercise you ran K-Means on 3-dimensional % pixel colors of an image. We first visualize this output in 3D, and then % apply PCA to obtain a visualization in 2D.
close all; close all; clc
% Reload the image from the previous exercise and run K-Means on it % For this to work, you need to complete the K-Means assignment first A = double(imread('bird_small.png'));
% If imread does not work for you, you can try instead % load ('bird_small.mat');
A = A / 255; img_size = size(A); X = reshape(A, img_size(1) * img_size(2), 3); K = 16; max_iters = 10; initial_centroids = kMeansInitCentroids(X, K); [centroids, idx] = runkMeans(X, initial_centroids, max_iters);
% Sample 1000 random indexes (since working with all the data is % too expensive. If you have a fast computer, you may increase this. sel = floor(rand(1000, 1) * size(X, 1)) + 1;
% Setup Color Palette palette = hsv(K); colors = palette(idx(sel), :);
% Visualize the data and centroid memberships in 3D figure; scatter3(X(sel, 1), X(sel, 2), X(sel, 3), 10, colors); title('Pixel dataset plotted in 3D. Color shows centroid memberships'); fprintf('Program paused. Press enter to continue.\n'); pause;
%% === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === % Use PCA to project this cloud to 2D for visualization
% Subtract the mean to use PCA [X_norm, mu, sigma] = featureNormalize(X);
% PCA and project the data to 2D [U, S] = pca(X_norm); Z = projectData(X_norm, U, 2);
% Plot in 2D figure; plotDataPoints(Z(sel, :), idx(sel), K); title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction'); fprintf('Program paused. Press enter to continue.\n'); pause;
|