about summary refs log tree commit diff
path: root/users/flokli/ipu6-softisp/libcamera/0010-libcamera-software_isp-Add-DebayerCpu-class.patch
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-01-30T09·43+0200
committerflokli <flokli@flokli.de>2024-01-30T09·54+0000
commitaf9a8d372b24710bf7fc27c8e81244e1ca6d1658 (patch)
treedf8bee03b202ec5081b9bd7e7b5d7b6ecdd28e1b /users/flokli/ipu6-softisp/libcamera/0010-libcamera-software_isp-Add-DebayerCpu-class.patch
parentb38be028d96ce107439f3323026270228a871a13 (diff)
feat(users/flokli/ipu6-softisp): init r/7454
This code adds support for the ipu6 webcams via libcamera, based on the work in
https://copr.fedorainfracloud.org/coprs/jwrdegoede/ipu6-softisp/.

It's supposed to be included in your NixOS configuration imports.

Change-Id: Ifb71999ad61161fa23506b97cb449f73fb1270e3
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10709
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
Autosubmit: flokli <flokli@flokli.de>
Diffstat (limited to 'users/flokli/ipu6-softisp/libcamera/0010-libcamera-software_isp-Add-DebayerCpu-class.patch')
-rw-r--r--users/flokli/ipu6-softisp/libcamera/0010-libcamera-software_isp-Add-DebayerCpu-class.patch727
1 files changed, 727 insertions, 0 deletions
diff --git a/users/flokli/ipu6-softisp/libcamera/0010-libcamera-software_isp-Add-DebayerCpu-class.patch b/users/flokli/ipu6-softisp/libcamera/0010-libcamera-software_isp-Add-DebayerCpu-class.patch
new file mode 100644
index 000000000000..56bc87336c2e
--- /dev/null
+++ b/users/flokli/ipu6-softisp/libcamera/0010-libcamera-software_isp-Add-DebayerCpu-class.patch
@@ -0,0 +1,727 @@
+From 7eb7164ed7d90ea4cf9ec7e4f35fa8efa25f35e9 Mon Sep 17 00:00:00 2001
+From: Hans de Goede <hdegoede@redhat.com>
+Date: Mon, 11 Dec 2023 17:00:17 +0100
+Subject: [PATCH 10/25] libcamera: software_isp: Add DebayerCpu class
+
+Add CPU based debayering implementation. This initial implementation
+only supports debayering packed 10 bits per pixel bayer data in
+the 4 standard bayer orders.
+
+Doxygen documentation by Dennis Bonke.
+
+Co-authored-by: Dennis Bonke <admin@dennisbonke.com>
+Signed-off-by: Dennis Bonke <admin@dennisbonke.com>
+Co-authored-by: Andrey Konovalov <andrey.konovalov@linaro.org>
+Signed-off-by: Andrey Konovalov <andrey.konovalov@linaro.org>
+Co-authored-by: Pavel Machek <pavel@ucw.cz>
+Signed-off-by: Pavel Machek <pavel@ucw.cz>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Tested-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org> # sc8280xp Lenovo x13s
+Tested-by: Pavel Machek <pavel@ucw.cz>
+---
+ .../internal/software_isp/debayer_cpu.h       | 131 +++++
+ .../internal/software_isp/meson.build         |   1 +
+ src/libcamera/software_isp/debayer_cpu.cpp    | 528 ++++++++++++++++++
+ src/libcamera/software_isp/meson.build        |   1 +
+ 4 files changed, 661 insertions(+)
+ create mode 100644 include/libcamera/internal/software_isp/debayer_cpu.h
+ create mode 100644 src/libcamera/software_isp/debayer_cpu.cpp
+
+diff --git a/include/libcamera/internal/software_isp/debayer_cpu.h b/include/libcamera/internal/software_isp/debayer_cpu.h
+new file mode 100644
+index 00000000..78573f44
+--- /dev/null
++++ b/include/libcamera/internal/software_isp/debayer_cpu.h
+@@ -0,0 +1,131 @@
++/* SPDX-License-Identifier: LGPL-2.1-or-later */
++/*
++ * Copyright (C) 2023, Linaro Ltd
++ * Copyright (C) 2023, Red Hat Inc.
++ *
++ * Authors:
++ * Hans de Goede <hdegoede@redhat.com> 
++ *
++ * debayer_cpu.h - CPU based debayering header
++ */
++
++#pragma once
++
++#include <memory>
++#include <stdint.h>
++#include <vector>
++
++#include <libcamera/base/object.h>
++
++#include "libcamera/internal/software_isp/swstats_cpu.h"
++#include "libcamera/internal/software_isp/debayer.h"
++
++namespace libcamera {
++
++/**
++ * \class DebayerCpu
++ * \brief Class for debayering on the CPU
++ *
++ * Implementation for CPU based debayering
++ */
++class DebayerCpu : public Debayer, public Object
++{
++public:
++	/*
++	  * FIXME this should be a plain (implementation independent)  SwStats
++	  * this can be fixed once getStats() is dropped.
++	  */
++	/**
++	 * \brief Constructs a DebayerCpu object.
++	 * \param[in] stats Pointer to the stats object to use.
++	 */
++	DebayerCpu(std::unique_ptr<SwStatsCpu> stats);
++	~DebayerCpu();
++
++	/*
++	 * Setup the Debayer object according to the passed in parameters.
++	 * Return 0 on success, a negative errno value on failure
++	 * (unsupported parameters).
++	 */
++	int configure(const StreamConfiguration &inputCfg,
++		      const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs);
++
++	/*
++	 * Get width and height at which the bayer-pattern repeats.
++	 * Return pattern-size or an empty Size for an unsupported inputFormat.
++	 */
++	Size patternSize(PixelFormat inputFormat);
++
++	std::vector<PixelFormat> formats(PixelFormat input);
++	std::tuple<unsigned int, unsigned int>
++		strideAndFrameSize(const PixelFormat &outputFormat, const Size &size);
++
++	void process(FrameBuffer *input, FrameBuffer *output, DebayerParams params);
++
++	/**
++	 * \brief Get the file descriptor for the statistics.
++	 *
++	 * \return the file descriptor pointing to the statistics.
++	 */
++	const SharedFD &getStatsFD() { return stats_->getStatsFD(); }
++
++	/**
++	 * \brief Get the output frame size.
++	 *
++	 * \return The output frame size.
++	 */
++	unsigned int frameSize() { return outputConfig_.frameSize; }
++private:
++	void initLinePointers(const uint8_t *linePointers[], const uint8_t *src);
++	void shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src);
++	void process2(const uint8_t *src, uint8_t *dst);
++	void process4(const uint8_t *src, uint8_t *dst);
++	/* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */
++	void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
++	void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
++	void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]);
++	void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]);
++
++	typedef void (DebayerCpu::*debayerFn)(uint8_t *dst, const uint8_t *src[]);
++
++	struct DebayerInputConfig {
++		Size patternSize;
++		unsigned int bpp; /* Memory used per pixel, not precision */
++		unsigned int stride;
++		std::vector<PixelFormat> outputFormats;
++	};
++
++	struct DebayerOutputConfig {
++		unsigned int bpp; /* Memory used per pixel, not precision */
++		unsigned int stride;
++		unsigned int frameSize;
++	};
++
++	int getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config);
++	int getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config);
++	int setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat);
++
++	uint8_t gamma_[1024];
++	uint8_t red_[256];
++	uint8_t green_[256];
++	uint8_t blue_[256];
++	debayerFn debayer0_;
++	debayerFn debayer1_;
++	debayerFn debayer2_;
++	debayerFn debayer3_;
++	Rectangle window_;
++	DebayerInputConfig inputConfig_;
++	DebayerOutputConfig outputConfig_;
++	std::unique_ptr<SwStatsCpu> stats_;
++	uint8_t *lineBuffers_[5];
++	unsigned int lineBufferIndex_;
++	bool enableInputMemcpy_;
++	float gamma_correction_;
++	int measuredFrames_;
++	int64_t frameProcessTime_;
++	/* Skip 30 frames for things to stabilize then measure 30 frames */
++	static const int framesToSkip = 30;
++	static const int framesToMeasure = 60;
++};
++
++} /* namespace libcamera */
+diff --git a/include/libcamera/internal/software_isp/meson.build b/include/libcamera/internal/software_isp/meson.build
+index 7e40925e..b5a0d737 100644
+--- a/include/libcamera/internal/software_isp/meson.build
++++ b/include/libcamera/internal/software_isp/meson.build
+@@ -2,6 +2,7 @@
+ 
+ libcamera_internal_headers += files([
+     'debayer.h',
++    'debayer_cpu.h',
+     'debayer_params.h',
+     'swisp_stats.h',
+     'swstats.h',
+diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
+new file mode 100644
+index 00000000..e0c3c658
+--- /dev/null
++++ b/src/libcamera/software_isp/debayer_cpu.cpp
+@@ -0,0 +1,528 @@
++/* SPDX-License-Identifier: LGPL-2.1-or-later */
++/*
++ * Copyright (C) 2023, Linaro Ltd
++ * Copyright (C) 2023, Red Hat Inc.
++ *
++ * Authors:
++ * Hans de Goede <hdegoede@redhat.com> 
++ *
++ * debayer_cpu.cpp - CPU based debayering class
++ */
++
++#include "libcamera/internal/software_isp/debayer_cpu.h"
++
++#include <math.h>
++#include <stdlib.h>
++#include <time.h>
++
++#include <libcamera/formats.h>
++
++#include "libcamera/internal/bayer_format.h"
++#include "libcamera/internal/framebuffer.h"
++#include "libcamera/internal/mapped_framebuffer.h"
++
++namespace libcamera {
++
++DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats)
++	: stats_(std::move(stats)), gamma_correction_(1.0)
++{
++#ifdef __x86_64__
++	enableInputMemcpy_ = false;
++#else
++	enableInputMemcpy_ = true;
++#endif
++	/* Initialize gamma to 1.0 curve */
++	for (int i = 0; i < 1024; i++)
++		gamma_[i] = i / 4;
++
++	for (int i = 0; i < 5; i++)
++		lineBuffers_[i] = NULL;
++}
++
++DebayerCpu::~DebayerCpu()
++{
++	for (int i = 0; i < 5; i++)
++		free(lineBuffers_[i]);
++}
++
++// RGR
++// GBG
++// RGR
++#define BGGR_BGR888(p, n, div)                                                                \
++	*dst++ = blue_[curr[x] / (div)];                                                      \
++	*dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];       \
++	*dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
++	x++;
++
++// GBG
++// RGR
++// GBG
++#define GRBG_BGR888(p, n, div)                                    \
++	*dst++ = blue_[(prev[x] + next[x]) / (2 * (div))];        \
++	*dst++ = green_[curr[x] / (div)];                         \
++	*dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
++	x++;
++
++// GRG
++// BGB
++// GRG
++#define GBRG_BGR888(p, n, div)                                     \
++	*dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
++	*dst++ = green_[curr[x] / (div)];                          \
++	*dst++ = red_[(prev[x] + next[x]) / (2 * (div))];          \
++	x++;
++
++// BGB
++// GRG
++// BGB
++#define RGGB_BGR888(p, n, div)                                                                 \
++	*dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
++	*dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];        \
++	*dst++ = red_[curr[x] / (div)];                                                        \
++	x++;
++
++void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
++{
++	const int width_in_bytes = window_.width * 5 / 4;
++	const uint8_t *prev = (const uint8_t *)src[0];
++	const uint8_t *curr = (const uint8_t *)src[1];
++	const uint8_t *next = (const uint8_t *)src[2];
++
++	/*
++	 * For the first pixel getting a pixel from the previous column uses
++	 * x - 2 to skip the 5th byte with least-significant bits for 4 pixels.
++	 * Same for last pixel (uses x + 2) and looking at the next column.
++	 * x++ in the for-loop skips the 5th byte with 4 x 2 lsb-s for 10bit packed.
++	 */
++	for (int x = 0; x < width_in_bytes; x++) {
++		/* Even pixel */
++		BGGR_BGR888(2, 1, 1)
++		/* Odd pixel BGGR -> GBRG */
++		GBRG_BGR888(1, 1, 1)
++		/* Same thing for next 2 pixels */
++		BGGR_BGR888(1, 1, 1)
++		GBRG_BGR888(1, 2, 1)
++	}
++}
++
++void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
++{
++	const int width_in_bytes = window_.width * 5 / 4;
++	const uint8_t *prev = (const uint8_t *)src[0];
++	const uint8_t *curr = (const uint8_t *)src[1];
++	const uint8_t *next = (const uint8_t *)src[2];
++
++	for (int x = 0; x < width_in_bytes; x++) {
++		/* Even pixel */
++		GRBG_BGR888(2, 1, 1)
++		/* Odd pixel GRBG -> RGGB */
++		RGGB_BGR888(1, 1, 1)
++		/* Same thing for next 2 pixels */
++		GRBG_BGR888(1, 1, 1)
++		RGGB_BGR888(1, 2, 1)
++	}
++}
++
++void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
++{
++	const int width_in_bytes = window_.width * 5 / 4;
++	const uint8_t *prev = (const uint8_t *)src[0];
++	const uint8_t *curr = (const uint8_t *)src[1];
++	const uint8_t *next = (const uint8_t *)src[2];
++
++	for (int x = 0; x < width_in_bytes; x++) {
++		/* Even pixel */
++		GBRG_BGR888(2, 1, 1)
++		/* Odd pixel GBGR -> BGGR */
++		BGGR_BGR888(1, 1, 1)
++		/* Same thing for next 2 pixels */
++		GBRG_BGR888(1, 1, 1)
++		BGGR_BGR888(1, 2, 1)
++	}
++}
++
++void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])
++{
++	const int width_in_bytes = window_.width * 5 / 4;
++	const uint8_t *prev = (const uint8_t *)src[0];
++	const uint8_t *curr = (const uint8_t *)src[1];
++	const uint8_t *next = (const uint8_t *)src[2];
++
++	for (int x = 0; x < width_in_bytes; x++) {
++		/* Even pixel */
++		RGGB_BGR888(2, 1, 1)
++		/* Odd pixel RGGB -> GRBG*/
++		GRBG_BGR888(1, 1, 1)
++		/* Same thing for next 2 pixels */
++		RGGB_BGR888(1, 1, 1)
++		GRBG_BGR888(1, 2, 1)
++	}
++}
++
++static bool isStandardBayerOrder(BayerFormat::Order order)
++{
++	return order == BayerFormat::BGGR || order == BayerFormat::GBRG ||
++	       order == BayerFormat::GRBG || order == BayerFormat::RGGB;
++}
++
++int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config)
++{
++	BayerFormat bayerFormat =
++		BayerFormat::fromPixelFormat(inputFormat);
++
++	if (bayerFormat.bitDepth == 10 &&
++	    bayerFormat.packing == BayerFormat::Packing::CSI2 &&
++	    isStandardBayerOrder(bayerFormat.order)) {
++		config.bpp = 10;
++		config.patternSize.width = 4; /* 5 bytes per *4* pixels */
++		config.patternSize.height = 2;
++		config.outputFormats = std::vector<PixelFormat>({ formats::RGB888 });
++		return 0;
++	}
++
++	LOG(Debayer, Info)
++		<< "Unsupported input format " << inputFormat.toString();
++	return -EINVAL;
++}
++
++int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config)
++{
++	if (outputFormat == formats::RGB888) {
++		config.bpp = 24;
++		return 0;
++	}
++
++	LOG(Debayer, Info)
++		<< "Unsupported output format " << outputFormat.toString();
++	return -EINVAL;
++}
++
++/* TODO: this ignores outputFormat since there is only 1 supported outputFormat for now */
++int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, [[maybe_unused]] PixelFormat outputFormat)
++{
++	BayerFormat bayerFormat =
++		BayerFormat::fromPixelFormat(inputFormat);
++
++	if (bayerFormat.bitDepth == 10 &&
++	    bayerFormat.packing == BayerFormat::Packing::CSI2) {
++		switch (bayerFormat.order) {
++		case BayerFormat::BGGR:
++			debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888;
++			debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888;
++			return 0;
++		case BayerFormat::GBRG:
++			debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888;
++			debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888;
++			return 0;
++		case BayerFormat::GRBG:
++			debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888;
++			debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888;
++			return 0;
++		case BayerFormat::RGGB:
++			debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888;
++			debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888;
++			return 0;
++		default:
++			break;
++		}
++	}
++
++	LOG(Debayer, Error) << "Unsupported input output format combination";
++	return -EINVAL;
++}
++
++int DebayerCpu::configure(const StreamConfiguration &inputCfg,
++			  const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs)
++{
++	if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0)
++		return -EINVAL;
++
++	if (stats_->configure(inputCfg) != 0)
++		return -EINVAL;
++
++	const Size &stats_pattern_size = stats_->patternSize();
++	if (inputConfig_.patternSize.width != stats_pattern_size.width ||
++	    inputConfig_.patternSize.height != stats_pattern_size.height) {
++		LOG(Debayer, Error)
++			<< "mismatching stats and debayer pattern sizes for "
++			<< inputCfg.pixelFormat.toString();
++		return -EINVAL;
++	}
++
++	inputConfig_.stride = inputCfg.stride;
++
++	if (outputCfgs.size() != 1) {
++		LOG(Debayer, Error)
++			<< "Unsupported number of output streams: "
++			<< outputCfgs.size();
++		return -EINVAL;
++	}
++
++	const StreamConfiguration &outputCfg = outputCfgs[0];
++	SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size);
++	std::tie(outputConfig_.stride, outputConfig_.frameSize) =
++		strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size);
++
++	if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) {
++		LOG(Debayer, Error)
++			<< "Invalid output size/stride: "
++			<< "\n  " << outputCfg.size << " (" << outSizeRange << ")"
++			<< "\n  " << outputCfg.stride << " (" << outputConfig_.stride << ")";
++		return -EINVAL;
++	}
++
++	if (setDebayerFunctions(inputCfg.pixelFormat, outputCfg.pixelFormat) != 0)
++		return -EINVAL;
++
++	window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) &
++		    ~(inputConfig_.patternSize.width - 1);
++	window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) &
++		    ~(inputConfig_.patternSize.height - 1);
++	window_.width = outputCfg.size.width;
++	window_.height = outputCfg.size.height;
++
++	/* Don't pass x,y since process() already adjusts src before passing it */
++	stats_->setWindow(Rectangle(window_.size()));
++
++	for (unsigned int i = 0;
++	     i < (inputConfig_.patternSize.height + 1) && enableInputMemcpy_;
++	     i++) {
++		/* pad with patternSize.Width on both left and right side */
++		size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) *
++				    inputConfig_.bpp / 8;
++
++		free(lineBuffers_[i]);
++		lineBuffers_[i] = (uint8_t *)malloc(lineLength);
++		if (!lineBuffers_[i])
++			return -ENOMEM;
++	}
++
++	measuredFrames_ = 0;
++	frameProcessTime_ = 0;
++
++	return 0;
++}
++
++Size DebayerCpu::patternSize(PixelFormat inputFormat)
++{
++	DebayerCpu::DebayerInputConfig config;
++
++	if (getInputConfig(inputFormat, config) != 0)
++		return {};
++
++	return config.patternSize;
++}
++
++std::vector<PixelFormat> DebayerCpu::formats(PixelFormat inputFormat)
++{
++	DebayerCpu::DebayerInputConfig config;
++
++	if (getInputConfig(inputFormat, config) != 0)
++		return std::vector<PixelFormat>();
++
++	return config.outputFormats;
++}
++
++std::tuple<unsigned int, unsigned int>
++DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size)
++{
++	DebayerCpu::DebayerOutputConfig config;
++
++	if (getOutputConfig(outputFormat, config) != 0)
++		return std::make_tuple(0, 0);
++
++	/* round up to multiple of 8 for 64 bits alignment */
++	unsigned int stride = (size.width * config.bpp / 8 + 7) & ~7;
++
++	return std::make_tuple(stride, stride * size.height);
++}
++
++void DebayerCpu::initLinePointers(const uint8_t *linePointers[], const uint8_t *src)
++{
++	const int patternHeight = inputConfig_.patternSize.height;
++
++	for (int i = 0; i < patternHeight; i++)
++		linePointers[i + 1] = src +
++				      (-patternHeight / 2 + i) * (int)inputConfig_.stride;
++
++	if (!enableInputMemcpy_)
++		return;
++
++	for (int i = 0; i < patternHeight; i++) {
++		/* pad with patternSize.Width on both left and right side */
++		size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) *
++				    inputConfig_.bpp / 8;
++		int padding = inputConfig_.patternSize.width * inputConfig_.bpp / 8;
++
++		memcpy(lineBuffers_[i], linePointers[i + 1] - padding, lineLength);
++		linePointers[i + 1] = lineBuffers_[i] + padding;
++	}
++
++	/* Point lineBufferIndex_ to first unused lineBuffer */
++	lineBufferIndex_ = patternHeight;
++}
++
++void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src)
++{
++	const int patternHeight = inputConfig_.patternSize.height;
++
++	for (int i = 0; i < patternHeight; i++)
++		linePointers[i] = linePointers[i + 1];
++
++	linePointers[patternHeight] = src +
++				      (patternHeight / 2) * (int)inputConfig_.stride;
++
++	if (!enableInputMemcpy_)
++		return;
++
++	size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) *
++			    inputConfig_.bpp / 8;
++	int padding = inputConfig_.patternSize.width * inputConfig_.bpp / 8;
++	memcpy(lineBuffers_[lineBufferIndex_], linePointers[patternHeight] - padding, lineLength);
++	linePointers[patternHeight] = lineBuffers_[lineBufferIndex_] + padding;
++
++	lineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1);
++}
++
++void DebayerCpu::process2(const uint8_t *src, uint8_t *dst)
++{
++	const unsigned int y_end = window_.y + window_.height;
++	const uint8_t *linePointers[3];
++
++	/* Adjust src to top left corner of the window */
++	src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;
++
++	initLinePointers(linePointers, src);
++
++	for (unsigned int y = window_.y; y < y_end; y += 2) {
++		shiftLinePointers(linePointers, src);
++		stats_->processLine0(y, linePointers);
++		(this->*debayer0_)(dst, linePointers);
++		src += inputConfig_.stride;
++		dst += outputConfig_.stride;
++
++		shiftLinePointers(linePointers, src);
++		(this->*debayer1_)(dst, linePointers);
++		src += inputConfig_.stride;
++		dst += outputConfig_.stride;
++	}
++}
++
++void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)
++{
++	const unsigned int y_end = window_.y + window_.height;
++	const uint8_t *linePointers[5];
++
++	/* Adjust src to top left corner of the window */
++	src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;
++
++	initLinePointers(linePointers, src);
++
++	for (unsigned int y = window_.y; y < y_end; y += 4) {
++		shiftLinePointers(linePointers, src);
++		stats_->processLine0(y, linePointers);
++		(this->*debayer0_)(dst, linePointers);
++		src += inputConfig_.stride;
++		dst += outputConfig_.stride;
++
++		shiftLinePointers(linePointers, src);
++		(this->*debayer1_)(dst, linePointers);
++		src += inputConfig_.stride;
++		dst += outputConfig_.stride;
++
++		shiftLinePointers(linePointers, src);
++		stats_->processLine2(y, linePointers);
++		(this->*debayer2_)(dst, linePointers);
++		src += inputConfig_.stride;
++		dst += outputConfig_.stride;
++
++		shiftLinePointers(linePointers, src);
++		(this->*debayer3_)(dst, linePointers);
++		src += inputConfig_.stride;
++		dst += outputConfig_.stride;
++	}
++}
++
++static inline int64_t timeDiff(timespec &after, timespec &before)
++{
++	return (after.tv_sec - before.tv_sec) * 1000000000LL +
++	       (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;
++}
++
++void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
++{
++	timespec frameStartTime;
++
++	if (measuredFrames_ < DebayerCpu::framesToMeasure) {
++		frameStartTime = {};
++		clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);
++	}
++
++	/* Apply DebayerParams */
++	if (params.gamma != gamma_correction_) {
++		for (int i = 0; i < 1024; i++)
++			gamma_[i] = 255 * powf(i / 1023.0, params.gamma);
++
++		gamma_correction_ = params.gamma;
++	}
++
++	for (int i = 0; i < 256; i++) {
++		int idx;
++
++		/* Apply gamma after gain! */
++		idx = std::min({ i * params.gainR / 64U, 1023U });
++		red_[i] = gamma_[idx];
++
++		idx = std::min({ i * params.gainG / 64U, 1023U });
++		green_[i] = gamma_[idx];
++
++		idx = std::min({ i * params.gainB / 64U, 1023U });
++		blue_[i] = gamma_[idx];
++	}
++
++	/* Copy metadata from the input buffer */
++	FrameMetadata &metadata = output->_d()->metadata();
++	metadata.status = input->metadata().status;
++	metadata.sequence = input->metadata().sequence;
++	metadata.timestamp = input->metadata().timestamp;
++
++	MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);
++	MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write);
++	if (!in.isValid() || !out.isValid()) {
++		LOG(Debayer, Error) << "mmap-ing buffer(s) failed";
++		metadata.status = FrameMetadata::FrameError;
++		return;
++	}
++
++	stats_->startFrame();
++
++	if (inputConfig_.patternSize.height == 2)
++		process2(in.planes()[0].data(), out.planes()[0].data());
++	else
++		process4(in.planes()[0].data(), out.planes()[0].data());
++
++	metadata.planes()[0].bytesused = out.planes()[0].size();
++
++	/* Measure before emitting signals */
++	if (measuredFrames_ < DebayerCpu::framesToMeasure &&
++	    ++measuredFrames_ > DebayerCpu::framesToSkip) {
++		timespec frameEndTime = {};
++		clock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime);
++		frameProcessTime_ += timeDiff(frameEndTime, frameStartTime);
++		if (measuredFrames_ == DebayerCpu::framesToMeasure) {
++			const int measuredFrames = DebayerCpu::framesToMeasure -
++						   DebayerCpu::framesToSkip;
++			LOG(Debayer, Info)
++				<< "Processed " << measuredFrames
++				<< " frames in " << frameProcessTime_ / 1000 << "us, "
++				<< frameProcessTime_ / (1000 * measuredFrames)
++				<< " us/frame";
++		}
++	}
++
++	stats_->finishFrame();
++	outputBufferReady.emit(output);
++	inputBufferReady.emit(input);
++}
++
++} /* namespace libcamera */
+diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build
+index d4ae5ac7..6d7a44d7 100644
+--- a/src/libcamera/software_isp/meson.build
++++ b/src/libcamera/software_isp/meson.build
+@@ -2,6 +2,7 @@
+ 
+ libcamera_sources += files([
+ 	'debayer.cpp',
++	'debayer_cpu.cpp',
+ 	'swstats.cpp',
+ 	'swstats_cpu.cpp',
+ ])
+-- 
+2.43.0
+