{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import math as m\n", "import sys\n", "import numpy as np\n", "import scipy as sp\n", "import scipy.io.wavfile as wavfile\n", "import scipy.fftpack as spfft\n", "from imageio import imread\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import matplotlib.animation as anim\n", "from mpl_toolkits.mplot3d import Axes3D\n", "import IPython\n", "import IPython.display as ipd\n", "import ipywidgets as ipw" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Figura 3.3: Exemplo de compressão 1\n", "## Gráfico da função $f(t)=(t-t^2)^2$ e sua DFT" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Função auxiliar para plotar uma função f(t) e seu espectro de energia, centralizado no 0\n", "def plot_func_fft(t,f):\n", " N = len(t)\n", " plt.plot(t,f)\n", " plt.show()\n", " X = np.fft.fft(f)\n", " X_power = 1/N * abs(X) ** 2\n", " plt.plot(np.arange(N) - N//2, np.roll(X_power, N//2))\n", " plt.show()\n", " return X" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "t = np.linspace(0, 1, 256)\n", "f = (t - t**2)**2\n", "X = plot_func_fft(t,f)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tabela 3.1: eficiência da compressão e distorção" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# várias funções auxiliares...\n", "\n", "def distortion(f,y):\n", " return np.linalg.norm(f-y) **2 / np.linalg.norm(f) **2\n", "\n", "def efficiency(X,c):\n", " return sum(above(X,c)) / len(above(X,c))\n", "\n", "def above(X,c):\n", " M = max(abs(X))\n", " return abs(X) > c * M\n", "\n", "def thresholding(f, X, C):\n", " P = []\n", " D = []\n", " for c in C:\n", " Y = X * above(X,c)\n", " y = np.real(np.fft.ifft(Y))\n", " P.append(efficiency( X, c ))\n", " D.append(distortion( f, y ))\n", " return (P, D)\n", "\n", "def plot_threshold(C,P,D):\n", " count = np.arange( len(C) )\n", " fig, ax = plt.subplots()\n", " ax.plot(count, C, label='c')\n", " ax.plot(count, P, label='P(c)')\n", " ax.plot(count, D, label='D(c)')\n", " ax.legend(loc='upper center')\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# C representa o conjunto de limiares para eliminação de coeficientes \"pequenos\" do espectro\n", "C = [0.5, 0.1, 0.03, 0.02, 0.01, 0.005]\n", "(P, D) = thresholding(f, X, C)\n", "print(\"c\\tP(c)\\t\\tD(c)\\n--------------------------------\")\n", "for i in range( len(C) ):\n", " print(\"%.3f\\t%f\\t%f\" % (C[i], P[i], D[i]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " ## Alternativa à tabela 3.1: expressa c, P(c) e D(c) em forma de gráfico" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "C = 10** np.linspace(-0.3,-8, 50)\n", "(P, D) = thresholding(f, X, C)\n", "plot_threshold(C,P,D)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Figura 3.4: Exemplo de compressão 2\n", "## Função degrau e sua DFT" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "N = 256\n", "t = np.linspace(0, 1, N)\n", "f = np.zeros(N)\n", "f[0 : int(np.floor(0.2 * N+1))] += 1\n", "\n", "X = plot_func_fft(t,f)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tabela 3.2: compressão e distorção para o exemplo 2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "C = [0.5, 0.1, 0.03, 0.02, 0.01, 0.005]\n", "(P, D) = thresholding(f, X, C)\n", "print(\"c\\tP(c)\\t\\tD(c)\\n--------------------------------\")\n", "for i in range( len(C) ):\n", " print(\"%.3f\\t%f\\t%f\" % (C[i], P[i], D[i]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Compressão e distorção em forma de gráfico" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "C = 10** np.linspace(-0.3,-8, 50)\n", "(P, D) = thresholding(f, X, C)\n", "plot_threshold(C,P,D)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Figura 3.5: Reconstrução da função usando compressão de 90%\n", "#### o valor do limiar abaixo foi escolhido \"a olho\" para produzir o valor de P(c) aproximadamente 0.1 (10% do tamanho do arquivo original)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "c = 7.5e-2\n", "Y = X * above(X,c)\n", "y = np.real(np.fft.ifft(Y))\n", "print(\"P(%f) = %f\\n\" % (c, efficiency(X,c)))\n", "print(\"D(%g) = %g\\n\" % (c, distortion(f,y)))\n", "\n", "plt.plot(t,f,t,y);\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Exemplo de compressão 3: função identidade $(\\;f(t)=t\\;)$ e sua DFT" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "N = 256;\n", "t = np.linspace(0, 1, N)\n", "f = t\n", "\n", "X = plot_func_fft(t,f)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tabela 3.3: compressão e distorção para $\\ f(t)=t$" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "C = [0.5, 0.1, 0.03, 0.02, 0.01, 0.005]\n", "(P, D) = thresholding(f, X, C)\n", "print(\"c\\tP(c)\\t\\tD(c)\\n--------------------------------\")\n", "for i in range( len(C) ):\n", " print(\"%.3f\\t%f\\t%f\" % (C[i], P[i], D[i]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tabela 3.3 na forma de gráfico" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "C = 10 ** np.linspace(-0.3, -8, 50);\n", "(P, D) = thresholding(f, X, C)\n", "plot_threshold(C,P,D)\n", "\n", "# Guarda os vetores P e D para comparar\n", "# com DCT (Figura 3.10)\n", "PFFT=P\n", "DFFT=D" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Figura 3.6: Reconstrução da função usando compressão de 90%\n", "#### o valor do limiar abaixo foi escolhido \"a olho\" para produzir o valor de P(c) aproximadamente 0.1 (10% do tamanho do arquivo original)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "c = 2.5e-2\n", "Y = X * above(X,c)\n", "y = np.real(np.fft.ifft(Y))\n", "print(\"P(%f) = %f\\n\" % (c, efficiency(X,c)))\n", "print(\"D(%g) = %g\\n\" % (c, distortion(f,y)))\n", "\n", "plt.plot(t,f,t,y);\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Figura 3.7: Usa o espectro original para sintetizar 3 períodos da forma de onda\n", "#### basta criar um vetor 3 vezes maior e espaçar os coeficientes do espectro original de 3 em 3, o que corresponde a multiplicar a frequência fundamental por 3." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Y3 = np.zeros(3 * N, dtype=complex)\n", "Y3[0 : 3*N : 3 ] = Y\n", "y3 = np.real(np.fft.ifft(Y3))\n", "\n", "plt.plot(np.linspace(-1, 2, len(y3)), y3)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Motivação para DCT: espelhamento da função original\n", "#### essa operação elimina a descontinuidade na fronteira da janela." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tt = np.linspace(0, 2 , 2*N)\n", "ff = np.zeros(2*N)\n", "ff[0:N] = f\n", "ff[N:] = np.flip(f,0)\n", "\n", "plt.plot(tt,ff)\n", "plt.show()\n", "XX = np.fft.fft(ff);\n", "\n", "M=max(abs(XX));" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reconstrução de $\\;f(t)=t\\;$ (espelhada) usando o mesmo limiar c anterior\n", "#### porém com muito mais compressão e muito mais qualidade (menor distorção)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "c = 2.5e-2\n", "YY = XX * above(XX,c)\n", "yy = np.real(np.fft.ifft(YY))\n", "print(\"P(%f) = %f\" % (c , efficiency(XX, c) ))\n", "print(\"D(%f) = %f\\n\" % (c, distortion(ff, yy)))\n", "\n", "plt.plot(tt,ff,tt,yy)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reconstrução de $\\;f(t)=t\\;$ (espelhada) usando um limiar c 100x menor" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "c = 2.5e-4\n", "YY = XX * above(XX,c)\n", "yy = np.real(np.fft.ifft(YY))\n", "print(\"P(%f) = %f\" % (c, efficiency(XX, c)))\n", "print(\"D(%f) = %f\" % (c, distortion(ff, yy)))\n", "\n", "plt.plot(tt,ff,tt,yy)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Figura 3.8: Funções básicas usadas na DCT" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "N=256;\n", "t = np.arange(N)\n", "\n", "for k in range(10):\n", " #rgbcolor = int8([bitget(k+1,1) bitget(k+1,2) bitget(k+1,3)]);\n", " plt.plot(t, m.sqrt(2/N)*np.cos(m.pi*k*t/N))\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Repetição dos exemplos anteriores usando DCT ao invés de DFT" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Exemplo 1: Função $f(t) = (t-t^2)^2$" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# função auxiliar\n", "def plot_func_dct(t,f):\n", " N = len(t)\n", " X = spfft.dct(f, norm='ortho')\n", "\n", " plt.plot(t,f)\n", " plt.show()\n", " plt.plot(np.arange(N),abs(X)**2/N)\n", " plt.show()\n", " return X" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "t = np.linspace(0, 1, 256)\n", "f = (t - t**2) **2\n", "X = plot_func_dct(t,f)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Gráfico correspondente à Tabela 3.4: compressão e distorção para $f(t)=(t-t^2)^2$" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# função auxiliar\n", "def threshold_dct(f, X, C):\n", " M = max(abs(X))\n", " L = len(C)\n", " P=[]\n", " D=[]\n", " for c in C:\n", " Y=X * (abs(X) > c*M)\n", " y = np.real(spfft.idct(Y, norm='ortho'))\n", " P.append(efficiency(X, c))\n", " D.append( distortion(f,y) )\n", " return P, D" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "C = 10**np.linspace(-0.3, -8, 50)\n", "(P, D) = threshold_dct(f, X, C)\n", "plot_threshold(C,P,D)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Exemplo 2: Função degrau" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "N = 256\n", "t = np.linspace(0, 1, N);\n", "f = np.zeros(N)\n", "f[0 : int(np.floor(0.2 * N+1))] += 1\n", "\n", "X = plot_func_dct(t,f)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Gráfico correspondente à Tabela 3.5 para a função degrau" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "C = 10 ** np.linspace(-0.3, -8, 50)\n", "(P, D) = threshold_dct(f, X, C)\n", "plot_threshold(C,P,D)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reconstrução da função degrau usando DCT com compressão de 90%\n", "#### o valor do limiar abaixo foi escolhido \"a olho\" para produzir o valor de P(c) aproximadamente 0.1 (10% do tamanho do arquivo original)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "c = 4.1e-2\n", "Y = X * above(X,c)\n", "y = np.real(spfft.idct(Y, norm='ortho'))\n", "print(\"P(%f) = %f\" % ( c, efficiency(X,c)))\n", "print(\"D(%f) = %f\" % ( c, distortion(f,y)))\n", "\n", "plt.plot(t,f,t,y)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Exemplo 3: Função $\\ f(t) = t$" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "N=256;\n", "t = np.linspace(0,1,N);\n", "f = t;\n", "\n", "X = plot_func_dct(t,f)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Gráfico correspondente à Tabela 3.6 para $f(t)=t$" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "C = 10 ** np.linspace(-0.3,-8,50)\n", "(P, D) = threshold_dct(f, X, C)\n", "plot_threshold(C,P,D)\n", "PDCT = P\n", "DDCT = D" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reconstrução da função $\\ f(t)=t$ usando DCT com compressão de 90%\n", "#### o valor do limiar abaixo foi escolhido \"a olho\" para produzir o valor de P(c) aproximadamente 0.1 (10% do tamanho do arquivo original)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "c = 2e-4\n", "Y = X * above(X,c)\n", "y = np.real(spfft.idct(Y, norm='ortho'))\n", "print(\"P(%f) = %f\\n\" % (c, efficiency(X,c)))\n", "print(\"D(%g) = %g\\n\" % (c, distortion(f,y)))\n", "\n", "plt.plot(t,f,t,y);\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Figura 3.10: $\\log(D(c))$ versus $P(c)$ para a compressão da função $\\ f(t)=t$\n", "# usando FFT (linha superior) e DCT (linha inferior)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X = np.fft.fft(f)\n", "C = 10 ** np.linspace(-0.3, -8, 50)\n", "(P, D) = thresholding(f, X, C)\n", "\n", "PFFT=P\n", "DFFT=D\n", "\n", "plt.plot(PFFT[1:14], np.log10(DFFT[1:14]))\n", "plt.plot(PDCT[1:42], np.log10(DDCT[1:42]))\n", "\n", "plt.title(\"Compressao usando FFT e DCT\")\n", "plt.xlabel(\"P(c)\")\n", "plt.ylabel(\"log10(D(c))\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Figura 3.11: reflexões em 2D (elimina descontinuidades de borda)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# função auxiliar\n", "def rgb2gray(rgb):\n", " fil = [0.299, 0.587, 0.144]\n", " return np.dot(rgb, fil)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "M = rgb2gray(imread(\"double_ferris.jpg\"))\n", "\n", "MM_upper = np.hstack((M, np.flip(M,1)))\n", "MM = np.vstack((MM_upper, np.flip(MM_upper,0)))\n", "\n", "plt.imshow(MM, cmap='gray')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Transformada DCT em blocos" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Figura 3.12: Imagem original, DCT e DCT em blocos de 8x8 pixels" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Recorta a figura um pouco para obter divisibilidade por unidades de 8x8 pixels\n", "My, Mx = M.shape\n", "divsize = 8\n", "new_My = My - My % divsize\n", "new_Mx = Mx - Mx % divsize\n", "M=M[:new_My, :new_Mx]\n", "\n", "My,Mx = M.shape\n", "plt.imshow(M, cmap='gray')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# calcula a DCT 2D fazendo a DCT 1D das linhas e das colunas\n", "def dct_2d(m):\n", " D1 = spfft.dct(m.T, norm='ortho')\n", " D2 = spfft.dct(D1.T, norm='ortho')\n", " return D2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "N = dct_2d(M)\n", "\n", "Nlog = np.log(1 + abs(N))\n", "\n", "plt.imshow(Nlog / np.amax(Nlog), cmap='gray')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "MBAK = M\n", "NBAK = N\n", "\n", "Mdiv = np.empty(M.shape)\n", "Ndiv = np.empty(N.shape)\n", "\n", "for j in range(0, My, divsize):\n", " for k in range(0, Mx, divsize):\n", " Mdiv = MBAK[j : j+divsize, k : k+divsize]\n", " Ndiv = dct_2d(Mdiv)\n", " NBAK[j : j+divsize, k : k+divsize] = Ndiv\n", "\n", "NBAKlog = np.log(1 + abs(NBAK))\n", "\n", "plt.imshow(NBAKlog / np.amax(Nlog), cmap='gray')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Figura adicional: detalhe da roda gigante superior na DCT em blocos de 8x8 pixels" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.imshow(np.log(1+abs(NBAK[0:400,300:600]))/np.amax(Nlog), cmap='gray')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Figura 3.13A: DCT em blocos reagrupados por frequência" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Reordena a DCT em blocos para juntar os coeficientes correspondentes a uma mesma frequência (j,k)\n", "NOVO = NBAK\n", "Ny,Nx = NOVO.shape\n", "by = Ny // divsize\n", "bx = Nx // divsize\n", "\n", "for j in range(divsize):\n", " for k in range(divsize):\n", " NOVO[ j*by : (j+1)*by , k*bx : (k+1)*bx ] = NBAK[j:Ny:divsize, k:Nx:divsize]\n", "\n", "NOVOlog = np.log(1 + abs(NOVO))\n", "\n", "plt.imshow(NOVOlog / np.amax(Nlog), cmap='gray')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Figura 3.13B: Coeficientes DC (de todos os blocos da imagem)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Detalhe da parte correspondente aos coeficientes DC de cada bloco\n", "plt.imshow(np.log(1 + abs( NOVO[0:Ny//divsize, 0:Nx//divsize] )) / np.amax(Nlog), cmap='gray')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Exemplo 3.1: Quantização JPEG em blocos 8x8" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Matriz de quantização usada em cada bloco da DCT durante a compressão JPG" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "E = np.array( [ [ 16, 11, 10, 16, 24, 40, 51, 61 ],\n", " [ 12, 12, 14, 19, 26, 58, 60, 55 ],\n", " [ 14, 13, 16, 24, 40, 57, 69, 56 ],\n", " [ 14, 17, 22, 29, 51, 87, 80, 62 ],\n", " [ 18, 22, 37, 56, 68, 109, 103, 77 ],\n", " [ 24, 35, 55, 64, 81, 104, 113, 92 ],\n", " [ 49, 64, 78, 87, 103, 121, 120, 101 ],\n", " [ 72, 92, 95, 98, 112, 100, 103, 99 ] ] )\n", "\n", "plt.imshow(E / np.amax(E), cmap='gray', interpolation='None')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Figura 3.14: Bloco B de exemplo para quantização" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "B = np.array( \n", " [ [ 47, 32, 75, 148, 192, 215, 216, 207 ],\n", " [ 36, 82, 161, 196, 205, 207, 190, 140 ],\n", " [ 86, 154, 200, 203, 213, 181, 143, 82 ],\n", " [ 154, 202, 209, 203, 159, 145, 147, 127 ],\n", " [ 184, 207, 199, 147, 134, 127, 137, 138 ],\n", " [ 205, 203, 125, 72, 123, 129, 150, 115 ],\n", " [ 209, 167, 126, 107, 111, 94, 105, 107 ],\n", " [ 191, 129, 126, 136, 106, 54, 99, 165 ] ] )\n", "\n", "plt.imshow(B / np.amax(B), cmap='gray', interpolation='None')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## $\\hat{B}=\\mbox{DCT}(B)$ \n", "#### usando valores centralizados (entre -127 e +128)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Bhat = dct_2d(B-127)\n", "print(np.round(Bhat,0))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Bloco com coeficientes DCT quantizados $q(\\hat{B})$" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Bquant = np.round(Bhat/(E))\n", "print(Bquant)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## DCT $\\tilde{B}$ reconstruída a partir dos coeficientes quantizados" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Btil = Bquant*(E);\n", "print(Btil)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# calcula a IDCT 2D fazendo a IDCT 1D das linhas e das colunas\n", "def idct_2d(m):\n", " D1 = spfft.idct(m.T, norm='ortho')\n", " D2 = spfft.idct(D1.T, norm='ortho')\n", " return D2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Bloco JPEG reconstruído a partir da DCT quantizada" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Bn = np.round(idct_2d(Btil)+127)\n", "print(Bn)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Compara bloco $B$ original e o bloco reconstruído $\\mbox{IDCT}(\\tilde{B})$" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.imshow(B / np.amax(B), cmap='gray', interpolation='None')\n", "plt.show()\n", "plt.imshow(Bn / np.amax(Bn), cmap='gray', interpolation='None')\n", "plt.show()\n", "# Calcula a distorção da imagem codificada em relação à original\n", "print(\"Distorção = {0}%\\n\".format(100*np.linalg.norm(B-Bn)/np.linalg.norm(B)))\n", "# Estimador da compressão a partir do número de zeros da DCT\n", "print(\"Compressão = {0}%\\n\".format(100*np.sum(np.sum(Bquant==0))/64))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Os valores de R abaixo correspondem ao fator de escala associado\n", "# à matriz de quantização. Quanto maior este fator, maior a distância\n", "# entre os níveis de quantização, e consequentemente serão maiores\n", "# tanto a compressão quanto a perda (ou ruído de quantização).\n", "R = [ 0.1, 0.5, 1, 2 ]\n", "for k in range(4):\n", " # Calcula valores quantizados\n", " Bquant = np.round(Bhat/(R[k]*E))\n", " # Retorna à escala original (este é o passo da de(s)quantização)\n", " Btil = Bquant*(R[k]*E)\n", " # Aplica a DCT, voltando os valores à faixa 0...255\n", " Bn = np.round(idct_2d(Btil)+127)\n", " plt.imshow(Bn/np.amax(Bn), cmap='gray', interpolation='None')\n", " plt.show()\n", " # Calcula a distorção da imagem codificada em relação à original\n", " print(\"Distorção(r={0}) = {1}%\\n\".format(R[k],100*np.linalg.norm(B-Bn)/np.linalg.norm(B)))\n", " # Estimador da compressão a partir do número de zeros da DCT\n", " print(\"Compressão(r={0}) = {1}%\\n\".format(R[k],100*np.sum(np.sum(Bquant==0))/64))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Funções auxiliares: DCT e IDCT em blocos 8x8" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Usage: Y = blkdct2(y, m, n).\n", "# Block DCT routine on array y, using m by n blocks. m must divide\n", "# number of rows in y, n divide number of colums. Transform returned in \n", "# Y.\n", "\n", "def blkdct2(y, m, n):\n", " my,ny = y.shape\n", " Y = 0*y\n", " for j in range(0, my, m):\n", " for k in range(0, ny, n):\n", " Z = y[j : j+m, k : k+n]\n", " Y[j : j+m, k : k+n] = dct_2d(Z)\n", " return Y\n", "\n", "def blkidct2(y, m, n):\n", " my,ny = y.shape\n", " Y = 0*y\n", " for j in range(0, my, m):\n", " for k in range(0, ny, n):\n", " Z = y[j : j+m, k : k+n]\n", " Y[j : j+m, k : k+n] = idct_2d(Z)\n", " return Y" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Exemplo da Figura 3.16, usando a imagem da roda gigante" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "My, Mx = M.shape\n", "divsize = 8\n", "new_My = My - My % divsize\n", "new_Mx = Mx - Mx % divsize\n", "M=M[:new_My, :new_Mx]\n", "\n", "My,Mx = M.shape\n", "plt.imshow(M, cmap='gray')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## DCT em blocos 8x8" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Calcula a DCT em blocos de 8x8, e mostra o resultado\n", "N=blkdct2(M,8,8)\n", "\n", "Nlog = np.log(1 + abs(N))\n", "plt.imshow(Nlog / np.amax(Nlog), cmap='gray')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Codifica cada bloco 8x8 usando o esquema de quantização/dequantização JPEG" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Nquant = N.copy()\n", "Ntil = N.copy()\n", "# Parâmetro de granularidade da quantização\n", "# (faz pouca diferença nesse exemplo; experimente R=0.1)\n", "R=1\n", "# Processa todos os blocos\n", "for j in range(0, My, divsize):\n", " for k in range(0, Mx, divsize):\n", " Nquant[j:j+divsize,k:k+divsize] = np.round(N[j:j+divsize,k:k+divsize]/(R*E))\n", " Ntil[j:j+divsize,k:k+divsize] = (R*E)*Nquant[j:j+divsize,k:k+divsize]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Faz a iDCT em blocos, restaurando a imagem original" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Mn = np.round(blkidct2(Ntil,8,8))\n", "plt.imshow(Mn, cmap='gray')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Mede níveis de distorção e compressão (estimada)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"Distorção(r={0}) = {1}%\\n\".format(R,100*np.linalg.norm(M-Mn)/np.linalg.norm(M)))\n", "print(\"Compressão(r={0}) = {1}%\\n\".format(R,100*np.sum(Nquant==0)/(My*Mx)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Mostra imagem \"diferença\" e imagem diferença normalizada" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.imshow(abs(M-Mn), cmap='gray')\n", "plt.show()\n", "\n", "plt.imshow((abs(M-Mn)/np.amax(M-Mn)), cmap='gray')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Exemplo 3.2 e figura 3.17\n", "## Reconstrução progressiva usando coeficientes DCT até j+k=l (para l=0...14)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Nl=0*Ntil\n", "for l in range(15):\n", " for j in range(l+1):\n", " k = l-j\n", " Nl[j:My:divsize,k:Mx:divsize] = Ntil[j:My:divsize,k:Mx:divsize]\n", " Ml = np.round(blkidct2(Nl,divsize,divsize))\n", " plt.imshow(Ml, cmap='gray')\n", " plt.title(\"Reconstrução progressiva até j+k={0}, distorção introduzida={1}%\".format(l,100*np.linalg.norm(Mn-Ml)/np.linalg.norm(M)))\n", " plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }