线性分类-高斯判别分析

定义

$$
\begin{aligned}
&\lbrace\left(x_{i}, y_{i}\right)\rbrace_{i=1}^{N}\\
&x_{i} \in \mathbb{R}^{p}, \quad y_{i} \in \lbrace 0,1\rbrace
\end{aligned}
$$

$$
\begin{aligned}
&C_{1}=\lbrace x_{i} | y_{i}=1, i=1, \cdots, N\rbrace \rightarrow S_{1}\\
&C_{2}=\lbrace x_{i} | y_{i}=0, i=1, \cdots, N\rbrace \rightarrow S_{2}\\
&\left|C_{1}\right|=N_{1}, \quad\left|C_{2}\right|=N_{2}, \quad N_{1}+N_{2}=N
\end{aligned}
$$

$$ p(y_i| x_i)\sim p(x_i|y_i) p(y_i) = p(x_i, y_i) $$

$$ y_i\sim Bernoulli(\phi)\Rightarrow p(y_i)=\phi^{y_i} (1-\phi)^{1-y_i}$$

$$
\begin{cases} \begin{array}{l}
{x_i | y_i=1 \sim N\left(\mu_{1}, \Sigma\right)} \\
{x_i | y_i=0 \sim N\left(\mu_{2}, \Sigma\right)}
\end{array} \end{cases} \Rightarrow N(\mu_1, \Sigma)^{y_i} \cdot N\left(\mu_{2}, \Sigma\right)^{1-y_i}
$$

对数似然:
$$
\begin{aligned}
\mathbb{L}(\theta) &=\log \prod_{i=1}^N p\left(x_i, y_i\right) \\
&=\sum_{i=1}^N \left[\underbrace{ \log N\left(\mu_1, \Sigma\right)^{y_i}}{(1)}+\underbrace{\log N(\mu_2, \Sigma)^{1-y_i}}{(2)}+\underbrace{\log \phi^{y_i}(1-\phi)^{1-y_i}}_{(3)}\right]\\
\hat{\theta} &=\arg\max _{\theta}\mathbb{L}(\theta) \\
\theta &=\left(\mu_1, \mu_2, \Sigma, \phi\right)
\end{aligned}
$$

求解

求$\phi$: $(3)=\sum_{i=1}^{N} y_{i} \log \phi+\left(1-y_{i}\right) \log (1-\phi)$
对$(3)$求导,并令其为$0$:
$$
\begin{aligned}
\frac{\partial(3)}{\partial \phi} &=\sum_{i=1}^{N} y_{i} \frac{1}{\phi}+\left(1-y_{i}\right) \frac{1}{1-\phi}(-1) \\
&=\sum_{i=1}^{N} y_{i} \cdot \frac{1}{\phi}-\left(1-y_{i}\right) \frac{1}{1-\phi} \\
&=0
\end{aligned}
$$

$$\hat{\phi}=\frac{1}{N} \sum_{i=1}^{N} y_{i}=\frac{N_{i}}{N}$$

求$\mu_1$
$$
\begin{aligned}
(1) &=\sum_{i=1}^{N} \log N\left(\mu_{1}, \Sigma\right)^{N} \\
&=\sum_{i=1}^{N} y_{i} \log \frac{1}{(2 \pi)^{\frac{p}{2}}|\Sigma|^{\frac{1}{2}}} \exp \left(\frac{1}{2}\left(x_{i}-\mu_1\right)^{\top} \Sigma^{-1}\left(x_{i}-\mu_1\right)\right)
\end{aligned}
$$

$$
\hat{\mu}1 =\arg\max {\mu_1} (1)=\arg \max {\mu_1} \sum{i=1}^{N} y{i}\left(-\frac{1}{2}\left(x{i}-\mu_{1}\right)^{T} \Sigma^{-1}\left(x_{i}-\mu_{1}\right)\right)
$$

$$
\begin{aligned}
&\Delta=\sum_{i=1}^{N} y_{i}\left(-\frac{1}{2}\left(x_{i}-\mu_1\right)^{T} \Sigma^{-1}\left(x_{i}-\mu_{1}\right)\right)\\
&\begin{array}{l}
{=-\frac{1}{2} \sum_{i=1}^{N} y_{i}\left(x_{i}^{T} \Sigma^{-1}-\mu_{1}^{T} \Sigma^{-1}\right)\left(x_{i}-\mu_{1}\right)} \\
{=-\frac{1}{2} \sum_{i=1}^{N} y_{i}({x_{i}^{T} \Sigma^{-1} x_{i}}} -2 {\mu_{1}^{\top} \Sigma^{-1} x_{i}}+ {\mu_{1}^{\top} \Sigma^{-1} u_{1}} )
\end{array}
\end{aligned}
$$

对$\Delta$求导,并令其为$0$:
$$\hat{\mu}_1 $$

$$\quad \quad = \frac{ \sum^N_{i=1} y_i x_i} { N_1}$$

求$\Sigma$,$\hat{\Sigma}=\arg\max _{\Sigma} (1) + (2)$

$$
\begin{array}{l}
(1) + (2)=\sum_{x_{i} \in C_1} \log N\left(\mu_1,\Sigma\right)+\sum_{x_{i} \in C_{2}} \log N\left(\mu_{2}, \Sigma\right) \\
{=-\frac{1}{2} N_{1} \log |\Sigma|-\frac{1}{2} N_{1} \operatorname{tr}\left(S_{1} \Sigma^{-1}\right)} \\
{\quad-\frac{1}{2} N_{2} \log |\Sigma|-\frac{1}{2} N_{2} \operatorname{tr}\left(S_2 \Sigma^{-1}\right)+C} \\
{=-\frac{1}{2} N \log |\Sigma|-\frac{1}{2} N_{1} \operatorname{tr}\left(S_{1} \Sigma^{-1}\right)-\frac{1}{2} N_{2} \operatorname{tr}\left(S_{2} \Sigma^{-1}\right)} \\
{=-\frac{1}{2}\left(N \log |\Sigma|+N_1 \operatorname{tr}\left(S_{1} \Sigma^{-1}\right)+N_{2} \operatorname{tr}\left(S_{2} \Sigma^{-1}\right)\right)+C}
\end{array}
$$

$$\begin{aligned}
\frac{\partial (1)+(2)}{\partial \Sigma}
&=-\frac{1}{2}\left(N \Sigma^{-1}-N_{1} S_{1} \Sigma^{-2}-N_{2} S_{2} \Sigma^{-2}\right) \\
&= 0
\end{aligned}
$$

$$\hat{\Sigma}=\frac{1}{N}\left(N_{1} S_{1}+N_{2} S_{2}\right)$$

非常感谢各位老板投喂!