From ef2ca0b625c533a470f5c264f95762fbdc299095 Mon Sep 17 00:00:00 2001 From: Phil Wang Date: Sun, 4 Oct 2020 21:53:53 -0700 Subject: [PATCH] new paper suggests image linear attention is more effective without query normalization --- denoising_diffusion_pytorch/denoising_diffusion_pytorch.py | 1 - setup.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py b/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py index bd0b46d71..072702060 100644 --- a/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py +++ b/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py @@ -169,7 +169,6 @@ def forward(self, x): b, c, h, w = x.shape qkv = self.to_qkv(x) q, k, v = rearrange(qkv, 'b (qkv heads c) h w -> qkv b heads c (h w)', heads = self.heads, qkv=3) - q = q.softmax(dim=-2) k = k.softmax(dim=-1) context = torch.einsum('bhdn,bhen->bhde', k, v) out = torch.einsum('bhde,bhdn->bhen', context, q) diff --git a/setup.py b/setup.py index 202ea4cb3..31f00ba0e 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name = 'denoising-diffusion-pytorch', packages = find_packages(), - version = '0.3.2', + version = '0.4.0', license='MIT', description = 'Denoising Diffusion Probabilistic Models - Pytorch', author = 'Phil Wang',