import Pkg
Pkg.add("ZipFile")
Pkg.add("Images")
Pkg.add("FileIO")


using ZipFile, Images, FileIO, Statistics, LinearAlgebra, PyPlot

# download the files as a zip archive:
imagezip = download("https://math.mit.edu/~stevenj/amazon-teapots.zip")

rows, cols = 350,380  # common size to resize all of the images to

# extract the images from the zip file, load them, resize them, and stick them into a big array Xdata
Xdata = Float64[]
r = ZipFile.Reader(imagezip)
for f in r.files
    # load image and rescale to rows x cols
    image = imresize(load(IOBuffer(read(f))), rows, cols)
    imagedata = Float64.(channelview(image))
    if length(imagedata) == rows*cols # grayscale
        imagedata = [imagedata; imagedata; imagedata] # convert to RGB
    end
    append!(Xdata, imagedata)
end

# remove duplicates and reshape into 136×399000 matrix X
Y = reshape(Xdata, :, length(r.files)) # reshape to (imagesize) x (numimages) matrix
X = reduce(hcat, unique(eachcol(Y)))' # remove duplicates and transpose
@show size(X)

# a useful function to reshape a length-399000 vector into a color image and plot it
function plotimg(imgdata, rows=rows, cols=cols)
    u = copy(imgdata)
    u .-= minimum(u)
    u ./= maximum(u)
    imshow(permutedims(reshape(u, 3, rows, cols), (2,3,1)))
    axis("off")
end

size(X) = (136, 399000)

plotimg (generic function with 3 methods)

figure(figsize=(20,15))
plot(c1, c2, "r.")
text.(c1, c2, string.(1:length(c1)), fontsize=11);

X

136×399000 adjoint(::Matrix{Float64}) with eltype Float64:
 0.996078  0.996078  0.996078  0.996078  …  1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 0.996078  0.996078  0.996078  0.996078     1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0       …  1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0       …  1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 ⋮                                       ⋱                      
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0       …  1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0       …  0.988235  0.988235  0.988235
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0          1.0       1.0       1.0
 1.0       1.0       1.0       1.0       …  1.0       1.0       1.0


o = ones(size(X,1)) # column vector of 136 1's

A = X - o * (o'X) / (o'o);  # compute X - ooᵀX/oᵀo


mean(X, dims=1) # the mean of each column of X

1×399000 Matrix{Float64}:
 0.993916  0.993858  0.994089  0.993916  …  0.994867  0.994493  0.993743


A ≈ X .- mean(X, dims=1)

true


A ≈ reduce(hcat, map(col -> col .- mean(col), eachcol(X)))  # yet another solution, even more convoluted

true


U, σ, V = svd(A);


plot(σ, "b.-")

title("Problem 1(a): Singular values of A");
xlabel(L"k");
legend([L"\sigma_k"]);


# the dominant two right-singular vectors are just the 1st two columns of V:
v1 = V[:, 1]
v2 = V[:, 2]

# now we want the dot product of each image with these directions:
c1 = A*v1
c2 = A*v2;


figure(figsize=(20,15))
plot(c1, c2, "r.")
text.(c1, c2, string.(1:length(c1)), fontsize=12);

title("Problem 1(b): Compressed data, visualized on a plane", fontsize=20);
xlabel(L"$c_1$", fontsize=20);
ylabel(L"$c_2$", fontsize=20);


subplot(1,3,1)
plotimg(X[105,:])
subplot(1,3,2)
plotimg(X[109,:])
subplot(1,3,3)
plotimg(X[116,:])
gcf().suptitle("Problem 1(c): first cluster 105,109,116", fontsize=14, y=0.75)

PyObject Text(0.5, 0.75, 'Problem 1(c): first cluster 105,109,116')


subplot(1,3,1)
plotimg(X[73,:])
subplot(1,3,2)
plotimg(X[103,:])
subplot(1,3,3)
plotimg(X[84,:])
gcf().suptitle("Problem 1(c): second cluster 73,103,84", fontsize=14, y=0.75)

PyObject Text(0.5, 0.75, 'Problem 1(c): second cluster 73,103,84')


subplot(1,3,1)
plotimg(X[122,:])
subplot(1,3,2)
plotimg(X[82,:])
subplot(1,3,3)
plotimg(X[10,:])
gcf().suptitle("Problem 1(c): third cluster 122,82,10", fontsize=14, y=0.75)

PyObject Text(0.5, 0.75, 'Problem 1(c): third cluster 122,82,10')


using LinearAlgebra
for i = 1:10
    B=randn(5,5)
    A=B-B'
    @show det(A)
end

det(A) = 1.21069476712944e-16
det(A) = -3.3264036182300682e-15
det(A) = -1.5275124982741926e-15
det(A) = -5.311646714207018e-18
det(A) = 1.0363047720795372e-16
det(A) = 0.0
det(A) = 3.4289498760725383e-15
det(A) = -4.040333952221032e-15
det(A) = -2.106038509542208e-14
det(A) = 2.0857732790276202e-16

18.06 Problem Set 8 Solutions¶

Problem 1 (5+5+5 points)¶

Solution¶

Problem 2 (2+2+2+2 points)¶

Solution¶

Problem 3 (4+4+4+4 points)¶

Solution¶

Problem 4 (5+5+5 points)¶

Solution¶

Problem 5 (10 points)¶

Solution¶