Module julius.fftconv

Implementation of a FFT based 1D convolution in PyTorch. While FFT is used in CUDNN for small kernel sizes, it is not the case for long ones, e.g. 512. This module implements efficient FFT based convolutions for such convolutions. A typical application is for evaluationg FIR filters with a long receptive field, typically evaluated with a stride of 1.

Expand source code Browse git
# File under the MIT license, see https://github.com/adefossez/julius/LICENSE for details.
# Author: adefossez, 2020

"""
Implementation of a FFT based 1D convolution in PyTorch.
While FFT is used in CUDNN for small kernel sizes, it is not the case for long ones, e.g. 512.
This module implements efficient FFT based convolutions for such convolutions. A typical
application is for evaluationg FIR filters with a long receptive field, typically
evaluated with a stride of 1.
"""
from typing import Optional

import torch
try:
    import torch.fft as new_fft
except ImportError:
    new_fft = None  # type: ignore
from torch.nn import functional as F

from .core import pad_to, unfold
from .utils import simple_repr


# This is quite verbose, but sadly needed to make TorchScript happy.
def _new_rfft(x: torch.Tensor):
    z = new_fft.rfft(x, dim=-1)
    return torch.view_as_real(z)


def _old_rfft(x: torch.Tensor):
    return torch.rfft(x, 1)  # type: ignore


def _old_irfft(x: torch.Tensor, length: int):
    result = torch.irfft(x, 1, signal_sizes=(length,))  # type: ignore
    return result


def _new_irfft(x: torch.Tensor, length: int):
    x = torch.view_as_complex(x)
    return new_fft.irfft(x, length, dim=-1)


if new_fft is None:
    _rfft = _old_rfft
    _irfft = _old_irfft
else:
    _rfft = _new_rfft
    _irfft = _new_irfft


def _compl_mul_conjugate(a: torch.Tensor, b: torch.Tensor):
    """
    Given a and b two tensors of dimension 4
    with the last dimension being the real and imaginary part,
    returns a multiplied by the conjugate of b, the multiplication
    being with respect to the second dimension.

    """
    # PyTorch 1.7 supports complex number, but not for all operations.
    # Once the support is widespread, this can likely go away.

    op = "bcft,dct->bdft"
    return torch.stack([
        torch.einsum(op, a[..., 0], b[..., 0]) + torch.einsum(op, a[..., 1], b[..., 1]),
        torch.einsum(op, a[..., 1], b[..., 0]) - torch.einsum(op, a[..., 0], b[..., 1])
    ],
                       dim=-1)


def fft_conv1d(
        input: torch.Tensor, weight: torch.Tensor,
        bias: Optional[torch.Tensor] = None, stride: int = 1, padding: int = 0,
        block_ratio: float = 5):
    """
    Same as `torch.nn.functional.conv1d` but using FFT for the convolution.
    Please check PyTorch documentation for more information.

    Args:
        input (Tensor): input signal of shape `[B, C, T]`.
        weight (Tensor): weight of the convolution `[D, C, K]` with `D` the number
            of output channels.
        bias (Tensor or None): if not None, bias term for the convolution.
        stride (int): stride of convolution.
        padding (int): padding to apply to the input.
        block_ratio (float): can be tuned for speed. The input is splitted in chunks
            with a size of `int(block_ratio * kernel_size)`.

    Shape:

        - Inputs: `input` is `[B, C, T]`, `weight` is `[D, C, K]` and bias is `[D]`.
        - Output: `(*, T)`


    ..note::
        This function is faster than `torch.nn.functional.conv1d` only in specific cases.
        Typically, the kernel size should be of the order of 256 to see any real gain,
        for a stride of 1.

    ..Warning::
        Dilation and groups are not supported at the moment. This function might use
        more memory than the default Conv1d implementation.
    """
    input = F.pad(input, (padding, padding))
    batch, channels, length = input.shape
    out_channels, _, kernel_size = weight.shape

    if length < kernel_size:
        raise RuntimeError(f"Input should be at least as large as the kernel size {kernel_size}, "
                           f"but it is only {length} samples long.")
    if block_ratio < 1:
        raise RuntimeError("Block ratio must be greater than 1.")

    # We are going to process the input blocks by blocks, as for some reason it is faster
    # and less memory intensive (I think the culprit is `torch.einsum`.
    block_size: int = min(int(kernel_size * block_ratio), length)
    fold_stride = block_size - kernel_size + 1
    weight = pad_to(weight, block_size)
    weight_z = _rfft(weight)

    # We pad the input and get the different frames, on which
    frames = unfold(input, block_size, fold_stride)

    frames_z = _rfft(frames)
    out_z = _compl_mul_conjugate(frames_z, weight_z)
    out = _irfft(out_z, block_size)
    # The last bit is invalid, because FFT will do a circular convolution.
    out = out[..., :-kernel_size + 1]
    out = out.reshape(batch, out_channels, -1)
    out = out[..., ::stride]
    target_length = (length - kernel_size) // stride + 1
    out = out[..., :target_length]
    if bias is not None:
        out += bias[:, None]
    return out


class FFTConv1d(torch.nn.Module):
    """
    Same as `torch.nn.Conv1d` but based on `fft_conv1d`.
    Please check PyTorch documentation for more information.

    Args:
        in_channels (int): number of input channels.
        out_channels (int): number of output channels.
        kernel_size (int): kernel size of convolution.
        stride (int): stride of convolution.
        padding (int): padding to apply to the input.
        bias (bool): if True, use a bias term.

    ..note::
        This module is faster than `torch.nn.Conv1d` only in specific cases.
        Typically, `kernel_size` should be of the order of 256 to see any real gain,
        for a stride of 1.

    ..warning::
        Dilation and groups are not supported at the moment. This module might use
        more memory than the default Conv1d implementation.

    >>> fftconv = FFTConv1d(12, 24, 128, 4)
    >>> x = torch.randn(4, 12, 1024)
    >>> print(list(fftconv(x).shape))
    [4, 24, 225]
    """
    def __init__(self, in_channels: int, out_channels: int, kernel_size: int,
                 stride: int = 1, padding: int = 0, bias: bool = True):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding

        conv = torch.nn.Conv1d(in_channels, out_channels, kernel_size, bias=bias)
        self.weight = conv.weight
        self.bias = conv.bias

    def forward(self, input: torch.Tensor):
        return fft_conv1d(
            input, self.weight, self.bias, self.stride, self.padding)

    def __repr__(self):
        return simple_repr(self, overrides={"bias": self.bias is not None})

Functions

def fft_conv1d(input: torch.Tensor, weight: torch.Tensor, bias: Optional[torch.Tensor] = None, stride: int = 1, padding: int = 0, block_ratio: float = 5)

Same as torch.nn.functional.conv1d but using FFT for the convolution. Please check PyTorch documentation for more information.

Args

input : Tensor
input signal of shape [B, C, T].
weight : Tensor
weight of the convolution [D, C, K] with D the number of output channels.
bias : Tensor or None
if not None, bias term for the convolution.
stride : int
stride of convolution.
padding : int
padding to apply to the input.
block_ratio : float
can be tuned for speed. The input is splitted in chunks with a size of int(block_ratio * kernel_size).

Shape

  • Inputs: input is [B, C, T], weight is [D, C, K] and bias is [D].
  • Output: (*, T)

Note

This function is faster than torch.nn.functional.conv1d only in specific cases. Typically, the kernel size should be of the order of 256 to see any real gain, for a stride of 1.

Warning

Dilation and groups are not supported at the moment. This function might use more memory than the default Conv1d implementation.

Expand source code Browse git
def fft_conv1d(
        input: torch.Tensor, weight: torch.Tensor,
        bias: Optional[torch.Tensor] = None, stride: int = 1, padding: int = 0,
        block_ratio: float = 5):
    """
    Same as `torch.nn.functional.conv1d` but using FFT for the convolution.
    Please check PyTorch documentation for more information.

    Args:
        input (Tensor): input signal of shape `[B, C, T]`.
        weight (Tensor): weight of the convolution `[D, C, K]` with `D` the number
            of output channels.
        bias (Tensor or None): if not None, bias term for the convolution.
        stride (int): stride of convolution.
        padding (int): padding to apply to the input.
        block_ratio (float): can be tuned for speed. The input is splitted in chunks
            with a size of `int(block_ratio * kernel_size)`.

    Shape:

        - Inputs: `input` is `[B, C, T]`, `weight` is `[D, C, K]` and bias is `[D]`.
        - Output: `(*, T)`


    ..note::
        This function is faster than `torch.nn.functional.conv1d` only in specific cases.
        Typically, the kernel size should be of the order of 256 to see any real gain,
        for a stride of 1.

    ..Warning::
        Dilation and groups are not supported at the moment. This function might use
        more memory than the default Conv1d implementation.
    """
    input = F.pad(input, (padding, padding))
    batch, channels, length = input.shape
    out_channels, _, kernel_size = weight.shape

    if length < kernel_size:
        raise RuntimeError(f"Input should be at least as large as the kernel size {kernel_size}, "
                           f"but it is only {length} samples long.")
    if block_ratio < 1:
        raise RuntimeError("Block ratio must be greater than 1.")

    # We are going to process the input blocks by blocks, as for some reason it is faster
    # and less memory intensive (I think the culprit is `torch.einsum`.
    block_size: int = min(int(kernel_size * block_ratio), length)
    fold_stride = block_size - kernel_size + 1
    weight = pad_to(weight, block_size)
    weight_z = _rfft(weight)

    # We pad the input and get the different frames, on which
    frames = unfold(input, block_size, fold_stride)

    frames_z = _rfft(frames)
    out_z = _compl_mul_conjugate(frames_z, weight_z)
    out = _irfft(out_z, block_size)
    # The last bit is invalid, because FFT will do a circular convolution.
    out = out[..., :-kernel_size + 1]
    out = out.reshape(batch, out_channels, -1)
    out = out[..., ::stride]
    target_length = (length - kernel_size) // stride + 1
    out = out[..., :target_length]
    if bias is not None:
        out += bias[:, None]
    return out

Classes

class FFTConv1d (in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, padding: int = 0, bias: bool = True)

Same as torch.nn.Conv1d but based on fft_conv1d(). Please check PyTorch documentation for more information.

Args

in_channels : int
number of input channels.
out_channels : int
number of output channels.
kernel_size : int
kernel size of convolution.
stride : int
stride of convolution.
padding : int
padding to apply to the input.
bias : bool
if True, use a bias term.

Note

This module is faster than torch.nn.Conv1d only in specific cases. Typically, kernel_size should be of the order of 256 to see any real gain, for a stride of 1.

Warning

Dilation and groups are not supported at the moment. This module might use more memory than the default Conv1d implementation.

>>> fftconv = FFTConv1d(12, 24, 128, 4)
>>> x = torch.randn(4, 12, 1024)
>>> print(list(fftconv(x).shape))
[4, 24, 225]

Initializes internal Module state, shared by both nn.Module and ScriptModule.

Expand source code Browse git
class FFTConv1d(torch.nn.Module):
    """
    Same as `torch.nn.Conv1d` but based on `fft_conv1d`.
    Please check PyTorch documentation for more information.

    Args:
        in_channels (int): number of input channels.
        out_channels (int): number of output channels.
        kernel_size (int): kernel size of convolution.
        stride (int): stride of convolution.
        padding (int): padding to apply to the input.
        bias (bool): if True, use a bias term.

    ..note::
        This module is faster than `torch.nn.Conv1d` only in specific cases.
        Typically, `kernel_size` should be of the order of 256 to see any real gain,
        for a stride of 1.

    ..warning::
        Dilation and groups are not supported at the moment. This module might use
        more memory than the default Conv1d implementation.

    >>> fftconv = FFTConv1d(12, 24, 128, 4)
    >>> x = torch.randn(4, 12, 1024)
    >>> print(list(fftconv(x).shape))
    [4, 24, 225]
    """
    def __init__(self, in_channels: int, out_channels: int, kernel_size: int,
                 stride: int = 1, padding: int = 0, bias: bool = True):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding

        conv = torch.nn.Conv1d(in_channels, out_channels, kernel_size, bias=bias)
        self.weight = conv.weight
        self.bias = conv.bias

    def forward(self, input: torch.Tensor):
        return fft_conv1d(
            input, self.weight, self.bias, self.stride, self.padding)

    def __repr__(self):
        return simple_repr(self, overrides={"bias": self.bias is not None})

Ancestors

  • torch.nn.modules.module.Module

Class variables

var dump_patches : bool
var training : bool

Methods

def forward(self, input: torch.Tensor) ‑> Callable[..., Any]

Defines the computation performed at every call.

Should be overridden by all subclasses.

Note

Although the recipe for forward pass needs to be defined within this function, one should call the :class:Module instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.

Expand source code Browse git
def forward(self, input: torch.Tensor):
    return fft_conv1d(
        input, self.weight, self.bias, self.stride, self.padding)