From 81ea6755bcc96fd95603602fd84ad75ede1e748d Mon Sep 17 00:00:00 2001
From: Eric Naim <dnaim@cachyos.org>
Date: Mon, 26 Jan 2026 17:33:28 +0800
Subject: [PATCH 1/9] amd-isp4

Signed-off-by: Eric Naim <dnaim@cachyos.org>
---
 Documentation/admin-guide/media/amdisp4-1.rst |   63 +
 Documentation/admin-guide/media/amdisp4.dot   |    6 +
 .../admin-guide/media/v4l-drivers.rst         |    1 +
 MAINTAINERS                                   |   25 +
 drivers/media/platform/Kconfig                |    1 +
 drivers/media/platform/Makefile               |    1 +
 drivers/media/platform/amd/Kconfig            |    3 +
 drivers/media/platform/amd/Makefile           |    3 +
 drivers/media/platform/amd/isp4/Kconfig       |   14 +
 drivers/media/platform/amd/isp4/Makefile      |   10 +
 drivers/media/platform/amd/isp4/isp4.c        |  235 ++++
 drivers/media/platform/amd/isp4/isp4.h        |   20 +
 drivers/media/platform/amd/isp4/isp4_debug.c  |  271 ++++
 drivers/media/platform/amd/isp4/isp4_debug.h  |   41 +
 .../platform/amd/isp4/isp4_fw_cmd_resp.h      |  314 +++++
 drivers/media/platform/amd/isp4/isp4_hw_reg.h |  124 ++
 .../media/platform/amd/isp4/isp4_interface.c  |  789 +++++++++++
 .../media/platform/amd/isp4/isp4_interface.h  |  141 ++
 drivers/media/platform/amd/isp4/isp4_subdev.c | 1057 +++++++++++++++
 drivers/media/platform/amd/isp4/isp4_subdev.h |  131 ++
 drivers/media/platform/amd/isp4/isp4_video.c  | 1165 +++++++++++++++++
 drivers/media/platform/amd/isp4/isp4_video.h  |   65 +
 22 files changed, 4480 insertions(+)
 create mode 100644 Documentation/admin-guide/media/amdisp4-1.rst
 create mode 100644 Documentation/admin-guide/media/amdisp4.dot
 create mode 100644 drivers/media/platform/amd/Kconfig
 create mode 100644 drivers/media/platform/amd/Makefile
 create mode 100644 drivers/media/platform/amd/isp4/Kconfig
 create mode 100644 drivers/media/platform/amd/isp4/Makefile
 create mode 100644 drivers/media/platform/amd/isp4/isp4.c
 create mode 100644 drivers/media/platform/amd/isp4/isp4.h
 create mode 100644 drivers/media/platform/amd/isp4/isp4_debug.c
 create mode 100644 drivers/media/platform/amd/isp4/isp4_debug.h
 create mode 100644 drivers/media/platform/amd/isp4/isp4_fw_cmd_resp.h
 create mode 100644 drivers/media/platform/amd/isp4/isp4_hw_reg.h
 create mode 100644 drivers/media/platform/amd/isp4/isp4_interface.c
 create mode 100644 drivers/media/platform/amd/isp4/isp4_interface.h
 create mode 100644 drivers/media/platform/amd/isp4/isp4_subdev.c
 create mode 100644 drivers/media/platform/amd/isp4/isp4_subdev.h
 create mode 100644 drivers/media/platform/amd/isp4/isp4_video.c
 create mode 100644 drivers/media/platform/amd/isp4/isp4_video.h

diff --git a/Documentation/admin-guide/media/amdisp4-1.rst b/Documentation/admin-guide/media/amdisp4-1.rst
new file mode 100644
index 000000000000..878141154f96
--- /dev/null
+++ b/Documentation/admin-guide/media/amdisp4-1.rst
@@ -0,0 +1,63 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+====================================
+AMD Image Signal Processor (amdisp4)
+====================================
+
+Introduction
+============
+
+This file documents the driver for the AMD ISP4 that is part of
+AMD Ryzen AI Max 300 Series.
+
+The driver is located under drivers/media/platform/amd/isp4 and uses
+the Media-Controller API.
+
+The driver exposes one video capture device to userspace and provide
+web camera like interface. Internally the video device is connected
+to the isp4 sub-device responsible for communication with the CCPU FW.
+
+Topology
+========
+
+.. _amdisp4_topology_graph:
+
+.. kernel-figure:: amdisp4.dot
+     :alt:   Diagram of the media pipeline topology
+     :align: center
+
+
+
+The driver has 1 sub-device: Representing isp4 image signal processor.
+The driver has 1 video device: Capture device for retrieving images.
+
+- ISP4 Image Signal Processing Subdevice Node
+
+---------------------------------------------
+
+The isp4 is represented as a single V4L2 subdev, the sub-device does not
+provide interface to the user space. The sub-device is connected to one video node
+(isp4_capture) with immutable active link. The sub-device represents ISP with
+connected sensor similar to smart cameras (sensors with integrated ISP).
+sub-device has only one link to the video device for capturing the frames.
+The sub-device communicates with CCPU FW for streaming configuration and
+buffer management.
+
+
+- isp4_capture - Frames Capture Video Node
+
+------------------------------------------
+
+Isp4_capture is a capture device to capture frames to memory.
+The entity is connected to isp4 sub-device. The video device
+provides web camera like interface to userspace. It supports
+mmap and dma buf types of memory.
+
+Capturing Video Frames Example
+==============================
+
+.. code-block:: bash
+
+         v4l2-ctl "-d" "/dev/video0" "--set-fmt-video=width=1920,height=1080,pixelformat=NV12" "--stream-mmap" "--stream-count=10"
diff --git a/Documentation/admin-guide/media/amdisp4.dot b/Documentation/admin-guide/media/amdisp4.dot
new file mode 100644
index 000000000000..978f30c1a31a
--- /dev/null
+++ b/Documentation/admin-guide/media/amdisp4.dot
@@ -0,0 +1,6 @@
+digraph board {
+	rankdir=TB
+	n00000001 [label="{{} | amd isp4\n | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+	n00000001:port0 -> n00000003 [style=bold]
+	n00000003 [label="Preview\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
+}
diff --git a/Documentation/admin-guide/media/v4l-drivers.rst b/Documentation/admin-guide/media/v4l-drivers.rst
index 393f83e8dc4d..0fb88449fffd 100644
--- a/Documentation/admin-guide/media/v4l-drivers.rst
+++ b/Documentation/admin-guide/media/v4l-drivers.rst
@@ -9,6 +9,7 @@ Video4Linux (V4L) driver-specific documentation
 .. toctree::
 	:maxdepth: 2
 
+	amdisp4-1
 	bttv
 	c3-isp
 	cafe_ccic
diff --git a/MAINTAINERS b/MAINTAINERS
index 67db88b04537..1db969732b8f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1153,6 +1153,31 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
 F:	drivers/iommu/amd/
 F:	include/linux/amd-iommu.h
 
+AMD ISP4 DRIVER
+M:	Bin Du <bin.du@amd.com>
+M:	Nirujogi Pratap <pratap.nirujogi@amd.com>
+L:	linux-media@vger.kernel.org
+S:	Maintained
+T:	git git://linuxtv.org/media.git
+F:	Documentation/admin-guide/media/amdisp4-1.rst
+F:	Documentation/admin-guide/media/amdisp4.dot
+F:	drivers/media/platform/amd/Kconfig
+F:	drivers/media/platform/amd/Makefile
+F:	drivers/media/platform/amd/isp4/Kconfig
+F:	drivers/media/platform/amd/isp4/Makefile
+F:	drivers/media/platform/amd/isp4/isp4.c
+F:	drivers/media/platform/amd/isp4/isp4.h
+F:	drivers/media/platform/amd/isp4/isp4_debug.c
+F:	drivers/media/platform/amd/isp4/isp4_debug.h
+F:	drivers/media/platform/amd/isp4/isp4_fw_cmd_resp.h
+F:	drivers/media/platform/amd/isp4/isp4_hw_reg.h
+F:	drivers/media/platform/amd/isp4/isp4_interface.c
+F:	drivers/media/platform/amd/isp4/isp4_interface.h
+F:	drivers/media/platform/amd/isp4/isp4_subdev.c
+F:	drivers/media/platform/amd/isp4/isp4_subdev.h
+F:	drivers/media/platform/amd/isp4/isp4_video.c
+F:	drivers/media/platform/amd/isp4/isp4_video.h
+
 AMD KFD
 M:	Felix Kuehling <Felix.Kuehling@amd.com>
 L:	amd-gfx@lists.freedesktop.org
diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index 3f0b7bb68cc9..0b33e927bd59 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -63,6 +63,7 @@ config VIDEO_MUX
 
 # Platform drivers - Please keep it alphabetically sorted
 source "drivers/media/platform/allegro-dvt/Kconfig"
+source "drivers/media/platform/amd/Kconfig"
 source "drivers/media/platform/amlogic/Kconfig"
 source "drivers/media/platform/amphion/Kconfig"
 source "drivers/media/platform/arm/Kconfig"
diff --git a/drivers/media/platform/Makefile b/drivers/media/platform/Makefile
index 6d5f79ddfcc3..16c185752474 100644
--- a/drivers/media/platform/Makefile
+++ b/drivers/media/platform/Makefile
@@ -6,6 +6,7 @@
 # Place here, alphabetically sorted by directory
 # (e. g. LC_ALL=C sort Makefile)
 obj-y += allegro-dvt/
+obj-y += amd/
 obj-y += amlogic/
 obj-y += amphion/
 obj-y += arm/
diff --git a/drivers/media/platform/amd/Kconfig b/drivers/media/platform/amd/Kconfig
new file mode 100644
index 000000000000..25af49f246b2
--- /dev/null
+++ b/drivers/media/platform/amd/Kconfig
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+source "drivers/media/platform/amd/isp4/Kconfig"
diff --git a/drivers/media/platform/amd/Makefile b/drivers/media/platform/amd/Makefile
new file mode 100644
index 000000000000..8bfc1955f22e
--- /dev/null
+++ b/drivers/media/platform/amd/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+obj-y += isp4/
diff --git a/drivers/media/platform/amd/isp4/Kconfig b/drivers/media/platform/amd/isp4/Kconfig
new file mode 100644
index 000000000000..d4e4ad436600
--- /dev/null
+++ b/drivers/media/platform/amd/isp4/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+config AMD_ISP4
+	tristate "AMD ISP4 and camera driver"
+	depends on DRM_AMD_ISP && VIDEO_DEV
+	select VIDEOBUF2_CORE
+	select VIDEOBUF2_MEMOPS
+	select VIDEOBUF2_V4L2
+	select VIDEO_V4L2_SUBDEV_API
+	help
+	  This is support for AMD ISP4 and camera subsystem driver.
+	  Say Y here to enable the ISP4 and camera device for video capture.
+	  To compile this driver as a module, choose M here. The module will
+	  be called amd_capture.
diff --git a/drivers/media/platform/amd/isp4/Makefile b/drivers/media/platform/amd/isp4/Makefile
new file mode 100644
index 000000000000..607151c0a2be
--- /dev/null
+++ b/drivers/media/platform/amd/isp4/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Copyright (C) 2025 Advanced Micro Devices, Inc.
+
+obj-$(CONFIG_AMD_ISP4) += amd_capture.o
+amd_capture-objs := isp4.o \
+		    isp4_debug.o \
+		    isp4_interface.o \
+		    isp4_subdev.o \
+		    isp4_video.o
diff --git a/drivers/media/platform/amd/isp4/isp4.c b/drivers/media/platform/amd/isp4/isp4.c
new file mode 100644
index 000000000000..93cb2d41320e
--- /dev/null
+++ b/drivers/media/platform/amd/isp4/isp4.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#include <linux/irq.h>
+#include <linux/pm_runtime.h>
+#include <linux/vmalloc.h>
+#include <media/v4l2-fwnode.h>
+#include <media/v4l2-ioctl.h>
+
+#include "isp4.h"
+#include "isp4_debug.h"
+#include "isp4_hw_reg.h"
+
+#define ISP4_DRV_NAME "amd_isp_capture"
+#define ISP4_FW_RESP_RB_IRQ_STATUS_MASK \
+	(ISP_SYS_INT0_STATUS__SYS_INT_RINGBUFFER_WPT9_INT_MASK  | \
+	 ISP_SYS_INT0_STATUS__SYS_INT_RINGBUFFER_WPT12_INT_MASK)
+
+static const struct {
+	const char *name;
+	u32 status_mask;
+	u32 en_mask;
+	u32 ack_mask;
+	u32 rb_int_num;
+} isp4_irq[ISP4SD_MAX_FW_RESP_STREAM_NUM] = {
+	/* The IRQ order is aligned with the isp4_subdev.fw_resp_thread order */
+	{
+		.name = "isp_irq_global",
+		.status_mask = ISP_SYS_INT0_STATUS__SYS_INT_RINGBUFFER_WPT12_INT_MASK,
+		.en_mask = ISP_SYS_INT0_EN__SYS_INT_RINGBUFFER_WPT12_EN_MASK,
+		.ack_mask = ISP_SYS_INT0_ACK__SYS_INT_RINGBUFFER_WPT12_ACK_MASK,
+		.rb_int_num = 4, /* ISP_4_1__SRCID__ISP_RINGBUFFER_WPT12 */
+	},
+	{
+		.name = "isp_irq_stream1",
+		.status_mask = ISP_SYS_INT0_STATUS__SYS_INT_RINGBUFFER_WPT9_INT_MASK,
+		.en_mask = ISP_SYS_INT0_EN__SYS_INT_RINGBUFFER_WPT9_EN_MASK,
+		.ack_mask = ISP_SYS_INT0_ACK__SYS_INT_RINGBUFFER_WPT9_ACK_MASK,
+		.rb_int_num = 0, /* ISP_4_1__SRCID__ISP_RINGBUFFER_WPT9 */
+	},
+};
+
+void isp4_intr_enable(struct isp4_subdev *isp_subdev, u32 index, bool enable)
+{
+	u32 intr_en;
+
+	/* Synchronize ISP_SYS_INT0_EN writes with the IRQ handler's writes */
+	spin_lock_irq(&isp_subdev->irq_lock);
+	intr_en = isp4hw_rreg(isp_subdev->mmio, ISP_SYS_INT0_EN);
+	if (enable)
+		intr_en |= isp4_irq[index].en_mask;
+	else
+		intr_en &= ~isp4_irq[index].en_mask;
+
+	isp4hw_wreg(isp_subdev->mmio, ISP_SYS_INT0_EN, intr_en);
+	spin_unlock_irq(&isp_subdev->irq_lock);
+}
+
+static void isp4_wake_up_resp_thread(struct isp4_subdev *isp_subdev, u32 index)
+{
+	struct isp4sd_thread_handler *thread_ctx = &isp_subdev->fw_resp_thread[index];
+
+	thread_ctx->resp_ready = true;
+	wake_up_interruptible(&thread_ctx->waitq);
+}
+
+static irqreturn_t isp4_irq_handler(int irq, void *arg)
+{
+	struct isp4_subdev *isp_subdev = arg;
+	u32 intr_ack = 0, intr_en = 0, intr_status;
+	int seen = 0;
+
+	/* Get the ISP_SYS interrupt status */
+	intr_status = isp4hw_rreg(isp_subdev->mmio, ISP_SYS_INT0_STATUS);
+	intr_status &= ISP4_FW_RESP_RB_IRQ_STATUS_MASK;
+
+	/* Find which ISP_SYS interrupts fired */
+	for (size_t i = 0; i < ARRAY_SIZE(isp4_irq); i++) {
+		if (intr_status & isp4_irq[i].status_mask) {
+			intr_ack |= isp4_irq[i].ack_mask;
+			intr_en |= isp4_irq[i].en_mask;
+			seen |= BIT(i);
+		}
+	}
+
+	/*
+	 * Disable the ISP_SYS interrupts that fired. Must be done before waking
+	 * the response threads, since they re-enable interrupts when finished.
+	 * The lock synchronizes RMW of INT0_EN with isp4_enable_interrupt().
+	 */
+	spin_lock(&isp_subdev->irq_lock);
+	intr_en = isp4hw_rreg(isp_subdev->mmio, ISP_SYS_INT0_EN) & ~intr_en;
+	isp4hw_wreg(isp_subdev->mmio, ISP_SYS_INT0_EN, intr_en);
+	spin_unlock(&isp_subdev->irq_lock);
+
+	/*
+	 * Clear the ISP_SYS interrupts. This must be done after the interrupts
+	 * are disabled, so that ISP FW won't flag any new interrupts on these
+	 * streams, and thus we don't need to clear interrupts again before
+	 * re-enabling them in the response thread.
+	 */
+	isp4hw_wreg(isp_subdev->mmio, ISP_SYS_INT0_ACK, intr_ack);
+
+	/* Wake up the response threads */
+	for (int i; (i = ffs(seen)); seen = (seen >> i) << i)
+		isp4_wake_up_resp_thread(isp_subdev, i - 1);
+
+	return IRQ_HANDLED;
+}
+
+static int isp4_capture_probe(struct platform_device *pdev)
+{
+	int irq[ISP4SD_MAX_FW_RESP_STREAM_NUM];
+	struct device *dev = &pdev->dev;
+	struct isp4_subdev *isp_subdev;
+	struct isp4_device *isp_dev;
+	size_t i;
+	int ret;
+
+	isp_dev = devm_kzalloc(dev, sizeof(*isp_dev), GFP_KERNEL);
+	if (!isp_dev)
+		return -ENOMEM;
+
+	dev->init_name = ISP4_DRV_NAME;
+
+	isp_subdev = &isp_dev->isp_subdev;
+	isp_subdev->mmio = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(isp_subdev->mmio))
+		return dev_err_probe(dev, PTR_ERR(isp_subdev->mmio), "isp ioremap fail\n");
+
+	for (i = 0; i < ARRAY_SIZE(isp4_irq); i++) {
+		irq[i] = platform_get_irq(pdev, isp4_irq[i].rb_int_num);
+		if (irq[i] < 0)
+			return dev_err_probe(dev, irq[i], "fail to get irq %d\n",
+					     isp4_irq[i].rb_int_num);
+
+		ret = devm_request_irq(dev, irq[i], isp4_irq_handler,
+				       IRQF_NO_AUTOEN, isp4_irq[i].name, isp_subdev);
+		if (ret)
+			return dev_err_probe(dev, ret, "fail to req irq %d\n", irq[i]);
+	}
+
+	/* Link the media device within the v4l2_device */
+	isp_dev->v4l2_dev.mdev = &isp_dev->mdev;
+
+	/* Initialize media device */
+	strscpy(isp_dev->mdev.model, "amd_isp41_mdev", sizeof(isp_dev->mdev.model));
+	snprintf(isp_dev->mdev.bus_info, sizeof(isp_dev->mdev.bus_info),
+		 "platform:%s", ISP4_DRV_NAME);
+	isp_dev->mdev.dev = dev;
+	media_device_init(&isp_dev->mdev);
+
+	/* register v4l2 device */
+	snprintf(isp_dev->v4l2_dev.name, sizeof(isp_dev->v4l2_dev.name),
+		 "AMD-V4L2-ROOT");
+	ret = v4l2_device_register(dev, &isp_dev->v4l2_dev);
+	if (ret) {
+		dev_err_probe(dev, ret, "fail register v4l2 device\n");
+		goto err_clean_media;
+	}
+
+	pm_runtime_set_suspended(dev);
+	pm_runtime_enable(dev);
+	spin_lock_init(&isp_subdev->irq_lock);
+	ret = isp4sd_init(&isp_dev->isp_subdev, &isp_dev->v4l2_dev, irq);
+	if (ret) {
+		dev_err_probe(dev, ret, "fail init isp4 sub dev\n");
+		goto err_pm_disable;
+	}
+
+	ret = media_create_pad_link(&isp_dev->isp_subdev.sdev.entity,
+				    0, &isp_dev->isp_subdev.isp_vdev.vdev.entity,
+				    0,
+				    MEDIA_LNK_FL_ENABLED |
+				    MEDIA_LNK_FL_IMMUTABLE);
+	if (ret) {
+		dev_err_probe(dev, ret, "fail to create pad link\n");
+		goto err_isp4_deinit;
+	}
+
+	ret = media_device_register(&isp_dev->mdev);
+	if (ret) {
+		dev_err_probe(dev, ret, "fail to register media device\n");
+		goto err_isp4_deinit;
+	}
+
+	platform_set_drvdata(pdev, isp_dev);
+	isp_debugfs_create(isp_dev);
+
+	return 0;
+
+err_isp4_deinit:
+	isp4sd_deinit(&isp_dev->isp_subdev);
+err_pm_disable:
+	pm_runtime_disable(dev);
+	v4l2_device_unregister(&isp_dev->v4l2_dev);
+err_clean_media:
+	media_device_cleanup(&isp_dev->mdev);
+
+	return ret;
+}
+
+static void isp4_capture_remove(struct platform_device *pdev)
+{
+	struct isp4_device *isp_dev = platform_get_drvdata(pdev);
+	struct device *dev = &pdev->dev;
+
+	isp_debugfs_remove(isp_dev);
+
+	media_device_unregister(&isp_dev->mdev);
+	isp4sd_deinit(&isp_dev->isp_subdev);
+	pm_runtime_disable(dev);
+	v4l2_device_unregister(&isp_dev->v4l2_dev);
+	media_device_cleanup(&isp_dev->mdev);
+}
+
+static struct platform_driver isp4_capture_drv = {
+	.probe = isp4_capture_probe,
+	.remove = isp4_capture_remove,
+	.driver = {
+		.name = ISP4_DRV_NAME,
+	}
+};
+
+module_platform_driver(isp4_capture_drv);
+
+MODULE_ALIAS("platform:" ISP4_DRV_NAME);
+MODULE_IMPORT_NS("DMA_BUF");
+
+MODULE_DESCRIPTION("AMD ISP4 Driver");
+MODULE_AUTHOR("Bin Du <bin.du@amd.com>");
+MODULE_AUTHOR("Pratap Nirujogi <pratap.nirujogi@amd.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/media/platform/amd/isp4/isp4.h b/drivers/media/platform/amd/isp4/isp4.h
new file mode 100644
index 000000000000..2db6683d6d8b
--- /dev/null
+++ b/drivers/media/platform/amd/isp4/isp4.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef _ISP4_H_
+#define _ISP4_H_
+
+#include <drm/amd/isp.h>
+#include "isp4_subdev.h"
+
+struct isp4_device {
+	struct v4l2_device v4l2_dev;
+	struct isp4_subdev isp_subdev;
+	struct media_device mdev;
+};
+
+void isp4_intr_enable(struct isp4_subdev *isp_subdev, u32 index, bool enable);
+
+#endif /* _ISP4_H_ */
diff --git a/drivers/media/platform/amd/isp4/isp4_debug.c b/drivers/media/platform/amd/isp4/isp4_debug.c
new file mode 100644
index 000000000000..c6d957ea9132
--- /dev/null
+++ b/drivers/media/platform/amd/isp4/isp4_debug.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#include "isp4.h"
+#include "isp4_debug.h"
+#include "isp4_hw_reg.h"
+#include "isp4_interface.h"
+
+#define ISP4DBG_FW_LOG_RINGBUF_SIZE (2 * 1024 * 1024)
+#define ISP4DBG_MACRO_2_STR(X) #X
+#define ISP4DBG_MAX_ONE_TIME_LOG_LEN 510
+
+#ifdef CONFIG_DEBUG_FS
+
+void isp_debugfs_create(struct isp4_device *isp_dev)
+{
+	isp_dev->isp_subdev.debugfs_dir = debugfs_create_dir("amd_isp", NULL);
+	debugfs_create_bool("fw_log_enable", 0644,
+			    isp_dev->isp_subdev.debugfs_dir,
+			    &isp_dev->isp_subdev.enable_fw_log);
+	isp_dev->isp_subdev.fw_log_output =
+		devm_kzalloc(isp_dev->isp_subdev.dev,
+			     ISP4DBG_FW_LOG_RINGBUF_SIZE + 32,
+			     GFP_KERNEL);
+}
+
+void isp_debugfs_remove(struct isp4_device *isp_dev)
+{
+	debugfs_remove_recursive(isp_dev->isp_subdev.debugfs_dir);
+	isp_dev->isp_subdev.debugfs_dir = NULL;
+}
+
+static u32 isp_fw_fill_rb_log(struct isp4_subdev *isp, void *sys, u32 rb_size)
+{
+	struct isp4_interface *ispif = &isp->ispif;
+	char *buf = isp->fw_log_output;
+	struct device *dev = isp->dev;
+	u32 rd_ptr, wr_ptr;
+	u32 total_cnt = 0;
+	u32 offset = 0;
+	u32 cnt;
+
+	if (!sys || !rb_size)
+		return 0;
+
+	guard(mutex)(&ispif->isp4if_mutex);
+
+	rd_ptr = isp4hw_rreg(isp->mmio, ISP_LOG_RB_RPTR0);
+	wr_ptr = isp4hw_rreg(isp->mmio, ISP_LOG_RB_WPTR0);
+
+	do {
+		if (wr_ptr > rd_ptr)
+			cnt = wr_ptr - rd_ptr;
+		else if (wr_ptr < rd_ptr)
+			cnt = rb_size - rd_ptr;
+		else
+			goto quit;
+
+		if (cnt > rb_size) {
+			dev_err(dev, "fail bad fw log size %u\n", cnt);
+			goto quit;
+		}
+
+		memcpy(buf + offset, sys + rd_ptr, cnt);
+
+		offset += cnt;
+		total_cnt += cnt;
+		rd_ptr = (rd_ptr + cnt) % rb_size;
+	} while (rd_ptr < wr_ptr);
+
+	isp4hw_wreg(isp->mmio, ISP_LOG_RB_RPTR0, rd_ptr);
+
+quit:
+	return total_cnt;
+}
+
+void isp_fw_log_print(struct isp4_subdev *isp)
+{
+	struct isp4_interface *ispif = &isp->ispif;
+	char *fw_log_buf = isp->fw_log_output;
+	u32 cnt;
+
+	if (!isp->enable_fw_log || !fw_log_buf)
+		return;
+
+	cnt = isp_fw_fill_rb_log(isp, ispif->fw_log_buf->sys_addr,
+				 ispif->fw_log_buf->mem_size);
+
+	if (cnt) {
+		char *line_end;
+		char temp_ch;
+		char *str;
+		char *end;
+
+		str = (char *)fw_log_buf;
+		end = ((char *)fw_log_buf + cnt);
+		fw_log_buf[cnt] = 0;
+
+		while (str < end) {
+			line_end = strchr(str, 0x0A);
+			if ((line_end && (str + ISP4DBG_MAX_ONE_TIME_LOG_LEN) >= line_end) ||
+			    (!line_end && (str + ISP4DBG_MAX_ONE_TIME_LOG_LEN) >= end)) {
+				if (line_end)
+					*line_end = 0;
+
+				if (*str != '\0')
+					dev_dbg(isp->dev,
+						"%s", str);
+
+				if (line_end) {
+					*line_end = 0x0A;
+					str = line_end + 1;
+				} else {
+					break;
+				}
+			} else {
+				u32 tmp_len = ISP4DBG_MAX_ONE_TIME_LOG_LEN;
+
+				temp_ch = str[tmp_len];
+				str[tmp_len] = 0;
+				dev_dbg(isp->dev, "%s", str);
+				str[tmp_len] = temp_ch;
+				str = &str[tmp_len];
+			}
+		}
+	}
+}
+#endif
+
+char *isp4dbg_get_buf_src_str(u32 src)
+{
+	switch (src) {
+	case BUFFER_SOURCE_STREAM:
+		return ISP4DBG_MACRO_2_STR(BUFFER_SOURCE_STREAM);
+	default:
+		return "Unknown buf source";
+	}
+}
+
+char *isp4dbg_get_buf_done_str(u32 status)
+{
+	switch (status) {
+	case BUFFER_STATUS_INVALID:
+		return ISP4DBG_MACRO_2_STR(BUFFER_STATUS_INVALID);
+	case BUFFER_STATUS_SKIPPED:
+		return ISP4DBG_MACRO_2_STR(BUFFER_STATUS_SKIPPED);
+	case BUFFER_STATUS_EXIST:
+		return ISP4DBG_MACRO_2_STR(BUFFER_STATUS_EXIST);
+	case BUFFER_STATUS_DONE:
+		return ISP4DBG_MACRO_2_STR(BUFFER_STATUS_DONE);
+	case BUFFER_STATUS_LACK:
+		return ISP4DBG_MACRO_2_STR(BUFFER_STATUS_LACK);
+	case BUFFER_STATUS_DIRTY:
+		return ISP4DBG_MACRO_2_STR(BUFFER_STATUS_DIRTY);
+	case BUFFER_STATUS_MAX:
+		return ISP4DBG_MACRO_2_STR(BUFFER_STATUS_MAX);
+	default:
+		return "Unknown Buf Done Status";
+	}
+}
+
+char *isp4dbg_get_img_fmt_str(int fmt /* enum isp4fw_image_format * */)
+{
+	switch (fmt) {
+	case IMAGE_FORMAT_NV12:
+		return "NV12";
+	case IMAGE_FORMAT_YUV422INTERLEAVED:
+		return "YUV422INTERLEAVED";
+	default:
+		return "unknown fmt";
+	}
+}
+
+void isp4dbg_show_bufmeta_info(struct device *dev, char *pre,
+			       void *in, void *orig_buf)
+{
+	struct isp4fw_buffer_meta_info *p;
+	struct isp4if_img_buf_info *orig;
+
+	if (!in)
+		return;
+
+	if (!pre)
+		pre = "";
+
+	p = in;
+	orig = orig_buf;
+
+	dev_dbg(dev, "%s(%s) en:%d,stat:%s(%u),src:%s\n", pre,
+		isp4dbg_get_img_fmt_str(p->image_prop.image_format),
+		p->enabled, isp4dbg_get_buf_done_str(p->status), p->status,
+		isp4dbg_get_buf_src_str(p->source));
+
+	dev_dbg(dev, "%p,0x%llx(%u) %p,0x%llx(%u) %p,0x%llx(%u)\n",
+		orig->planes[0].sys_addr, orig->planes[0].mc_addr,
+		orig->planes[0].len, orig->planes[1].sys_addr,
+		orig->planes[1].mc_addr, orig->planes[1].len,
+		orig->planes[2].sys_addr, orig->planes[2].mc_addr,
+		orig->planes[2].len);
+}
+
+char *isp4dbg_get_buf_type(u32 type)
+{
+	/* enum isp4fw_buffer_type */
+	switch (type) {
+	case BUFFER_TYPE_PREVIEW:
+		return ISP4DBG_MACRO_2_STR(BUFFER_TYPE_PREVIEW);
+	case BUFFER_TYPE_META_INFO:
+		return ISP4DBG_MACRO_2_STR(BUFFER_TYPE_META_INFO);
+	case BUFFER_TYPE_MEM_POOL:
+		return ISP4DBG_MACRO_2_STR(BUFFER_TYPE_MEM_POOL);
+	default:
+		return "unknown type";
+	}
+}
+
+char *isp4dbg_get_cmd_str(u32 cmd)
+{
+	switch (cmd) {
+	case CMD_ID_START_STREAM:
+		return ISP4DBG_MACRO_2_STR(CMD_ID_START_STREAM);
+	case CMD_ID_STOP_STREAM:
+		return ISP4DBG_MACRO_2_STR(CMD_ID_STOP_STREAM);
+	case CMD_ID_SEND_BUFFER:
+		return ISP4DBG_MACRO_2_STR(CMD_ID_SEND_BUFFER);
+	case CMD_ID_SET_STREAM_CONFIG:
+		return ISP4DBG_MACRO_2_STR(CMD_ID_SET_STREAM_CONFIG);
+	case CMD_ID_SET_OUT_CHAN_PROP:
+		return ISP4DBG_MACRO_2_STR(CMD_ID_SET_OUT_CHAN_PROP);
+	case CMD_ID_ENABLE_OUT_CHAN:
+		return ISP4DBG_MACRO_2_STR(CMD_ID_ENABLE_OUT_CHAN);
+	default:
+		return "unknown cmd";
+	}
+}
+
+char *isp4dbg_get_resp_str(u32 cmd)
+{
+	switch (cmd) {
+	case RESP_ID_CMD_DONE:
+		return ISP4DBG_MACRO_2_STR(RESP_ID_CMD_DONE);
+	case RESP_ID_NOTI_FRAME_DONE:
+		return ISP4DBG_MACRO_2_STR(RESP_ID_NOTI_FRAME_DONE);
+	default:
+		return "unknown respid";
+	}
+}
+
+char *isp4dbg_get_if_stream_str(u32 stream /* enum fw_cmd_resp_stream_id */)
+{
+	switch (stream) {
+	case ISP4IF_STREAM_ID_GLOBAL:
+		return "STREAM_GLOBAL";
+	case ISP4IF_STREAM_ID_1:
+		return "STREAM1";
+	default:
+		return "unknown streamID";
+	}
+}
+
+char *isp4dbg_get_out_ch_str(int ch /* enum isp4fw_pipe_out_ch */)
+{
+	switch ((enum isp4fw_pipe_out_ch)ch) {
+	case ISP_PIPE_OUT_CH_PREVIEW:
+		return "prev";
+	default:
+		return "unknown channel";
+	}
+}
diff --git a/drivers/media/platform/amd/isp4/isp4_debug.h b/drivers/media/platform/amd/isp4/isp4_debug.h
new file mode 100644
index 000000000000..1a13762af502
--- /dev/null
+++ b/drivers/media/platform/amd/isp4/isp4_debug.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef _ISP4_DEBUG_H_
+#define _ISP4_DEBUG_H_
+
+#include <linux/printk.h>
+#include <linux/dev_printk.h>
+
+#include "isp4_subdev.h"
+
+#ifdef CONFIG_DEBUG_FS
+struct isp4_device;
+
+void isp_debugfs_create(struct isp4_device *isp_dev);
+void isp_debugfs_remove(struct isp4_device *isp_dev);
+void isp_fw_log_print(struct isp4_subdev *isp);
+
+#else
+
+/* to avoid checkpatch warning */
+#define isp_debugfs_create(cam) ((void)(cam))
+#define isp_debugfs_remove(cam) ((void)(cam))
+#define isp_fw_log_print(isp) ((void)(isp))
+
+#endif /* CONFIG_DEBUG_FS */
+
+void isp4dbg_show_bufmeta_info(struct device *dev, char *pre, void *p,
+			       void *orig_buf /* struct sys_img_buf_handle */);
+char *isp4dbg_get_img_fmt_str(int fmt /* enum _image_format_t */);
+char *isp4dbg_get_out_ch_str(int ch /* enum _isp_pipe_out_ch_t */);
+char *isp4dbg_get_cmd_str(u32 cmd);
+char *isp4dbg_get_buf_type(u32 type);/* enum _buffer_type_t */
+char *isp4dbg_get_resp_str(u32 resp);
+char *isp4dbg_get_buf_src_str(u32 src);
+char *isp4dbg_get_buf_done_str(u32 status);
+char *isp4dbg_get_if_stream_str(u32 stream);
+
+#endif /* _ISP4_DEBUG_H_ */
diff --git a/drivers/media/platform/amd/isp4/isp4_fw_cmd_resp.h b/drivers/media/platform/amd/isp4/isp4_fw_cmd_resp.h
new file mode 100644
index 000000000000..d571b3873edb
--- /dev/null
+++ b/drivers/media/platform/amd/isp4/isp4_fw_cmd_resp.h
@@ -0,0 +1,314 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef _ISP4_CMD_RESP_H_
+#define _ISP4_CMD_RESP_H_
+
+/*
+ * @brief Host and Firmware command & response channel.
+ *        Two types of command/response channel.
+ *          Type Global Command has one command/response channel.
+ *          Type Stream Command has one command/response channel.
+ *-----------                                        ------------
+ *|         |       ---------------------------      |          |
+ *|         |  ---->|  Global Command         |----> |          |
+ *|         |       ---------------------------      |          |
+ *|         |                                        |          |
+ *|         |                                        |          |
+ *|         |       ---------------------------      |          |
+ *|         |  ---->|   Stream Command        |----> |          |
+ *|         |       ---------------------------      |          |
+ *|         |                                        |          |
+ *|         |                                        |          |
+ *|         |                                        |          |
+ *|  HOST   |                                        | Firmware |
+ *|         |                                        |          |
+ *|         |                                        |          |
+ *|         |       --------------------------       |          |
+ *|         |  <----|  Global Response       |<----  |          |
+ *|         |       --------------------------       |          |
+ *|         |                                        |          |
+ *|         |                                        |          |
+ *|         |       --------------------------       |          |
+ *|         |  <----|  Stream Response       |<----  |          |
+ *|         |       --------------------------       |          |
+ *|         |                                        |          |
+ *|         |                                        |          |
+ *-----------                                        ------------
+ */
+
+/*
+ * @brief command ID format
+ *        cmd_id is in the format of following type:
+ *        type: indicate command type, global/stream commands.
+ *        group: indicate the command group.
+ *        id: A unique command identification in one type and group.
+ *        |<-Bit31 ~ Bit24->|<-Bit23 ~ Bit16->|<-Bit15 ~ Bit0->|
+ *        |      type       |      group      |       id       |
+ */
+
+#define CMD_TYPE_SHIFT                  24
+#define CMD_GROUP_SHIFT                 16
+#define CMD_TYPE_STREAM_CTRL            (0x2U << CMD_TYPE_SHIFT)
+
+#define CMD_GROUP_STREAM_CTRL           (0x1U << CMD_GROUP_SHIFT)
+#define CMD_GROUP_STREAM_BUFFER         (0x4U << CMD_GROUP_SHIFT)
+
+/* Stream  Command */
+#define CMD_ID_SET_STREAM_CONFIG        (CMD_TYPE_STREAM_CTRL | CMD_GROUP_STREAM_CTRL | 0x1)
+#define CMD_ID_SET_OUT_CHAN_PROP        (CMD_TYPE_STREAM_CTRL | CMD_GROUP_STREAM_CTRL | 0x3)
+#define CMD_ID_ENABLE_OUT_CHAN          (CMD_TYPE_STREAM_CTRL | CMD_GROUP_STREAM_CTRL | 0x5)
+#define CMD_ID_START_STREAM             (CMD_TYPE_STREAM_CTRL | CMD_GROUP_STREAM_CTRL | 0x7)
+#define CMD_ID_STOP_STREAM              (CMD_TYPE_STREAM_CTRL | CMD_GROUP_STREAM_CTRL | 0x8)
+
+/* Stream Buffer Command */
+#define CMD_ID_SEND_BUFFER              (CMD_TYPE_STREAM_CTRL | CMD_GROUP_STREAM_BUFFER | 0x1)
+
+/*
+ * @brief response ID format
+ *        resp_id is in the format of following type:
+ *        type: indicate command type, global/stream commands.
+ *        group: indicate the command group.
+ *        id: A unique command identification in one type and group.
+ *        |<-Bit31 ~ Bit24->|<-Bit23 ~ Bit16->|<-Bit15 ~ Bit0->|
+ *        |      type       |      group      |       id       |
+ */
+
+#define RESP_GROUP_SHIFT                16
+#define RESP_GROUP_MASK                 (0xff << RESP_GROUP_SHIFT)
+
+#define GET_RESP_GROUP_VALUE(resp_id)   (((resp_id) & RESP_GROUP_MASK) >> RESP_GROUP_SHIFT)
+#define GET_RESP_ID_VALUE(resp_id)      ((resp_id) & 0xffff)
+
+#define RESP_GROUP_GENERAL              (0x1 << RESP_GROUP_SHIFT)
+#define RESP_GROUP_NOTIFICATION         (0x3 << RESP_GROUP_SHIFT)
+
+/* General Response */
+#define RESP_ID_CMD_DONE                (RESP_GROUP_GENERAL | 0x1)
+
+/* Notification */
+#define RESP_ID_NOTI_FRAME_DONE         (RESP_GROUP_NOTIFICATION | 0x1)
+
+#define CMD_STATUS_SUCCESS              0
+#define CMD_STATUS_FAIL                 1
+#define CMD_STATUS_SKIPPED              2
+
+#define ADDR_SPACE_TYPE_GPU_VA          4
+
+#define FW_MEMORY_POOL_SIZE             (100 * 1024 * 1024)
+
+/*
+ * standard ISP mipicsi=>isp
+ */
+#define MIPI0_ISP_PIPELINE_ID           0x5f91
+
+enum isp4fw_sensor_id {
+	SENSOR_ID_ON_MIPI0  = 0,  /* Sensor id for ISP input from MIPI port 0 */
+};
+
+enum isp4fw_stream_id {
+	STREAM_ID_INVALID = -1, /* STREAM_ID_INVALID. */
+	STREAM_ID_1 = 0,        /* STREAM_ID_1. */
+	STREAM_ID_2 = 1,        /* STREAM_ID_2. */
+	STREAM_ID_3 = 2,        /* STREAM_ID_3. */
+	STREAM_ID_MAXIMUM       /* STREAM_ID_MAXIMUM. */
+};
+
+enum isp4fw_image_format {
+	IMAGE_FORMAT_NV12 = 1,              /* 4:2:0,semi-planar, 8-bit */
+	IMAGE_FORMAT_YUV422INTERLEAVED = 7, /* interleave, 4:2:2, 8-bit */
+};
+
+enum isp4fw_pipe_out_ch {
+	ISP_PIPE_OUT_CH_PREVIEW = 0,
+};
+
+enum isp4fw_yuv_range {
+	ISP_YUV_RANGE_FULL = 0,     /* YUV value range in 0~255 */
+	ISP_YUV_RANGE_NARROW = 1,   /* YUV value range in 16~235 */
+	ISP_YUV_RANGE_MAX
+};
+
+enum isp4fw_buffer_type {
+	BUFFER_TYPE_PREVIEW = 8,
+	BUFFER_TYPE_META_INFO = 10,
+	BUFFER_TYPE_MEM_POOL = 15,
+};
+
+enum isp4fw_buffer_status {
+	BUFFER_STATUS_INVALID,  /* The buffer is INVALID */
+	BUFFER_STATUS_SKIPPED,  /* The buffer is not filled with image data */
+	BUFFER_STATUS_EXIST,    /* The buffer is exist and waiting for filled */
+	BUFFER_STATUS_DONE,     /* The buffer is filled with image data */
+	BUFFER_STATUS_LACK,     /* The buffer is unavailable */
+	BUFFER_STATUS_DIRTY,    /* The buffer is dirty, probably caused by
+				 * LMI leakage
+				 */
+	BUFFER_STATUS_MAX       /* The buffer STATUS_MAX */
+};
+
+enum isp4fw_buffer_source {
+	/* The buffer is from the stream buffer queue */
+	BUFFER_SOURCE_STREAM,
+};
+
+struct isp4fw_error_code {
+	u32 code1;
+	u32 code2;
+	u32 code3;
+	u32 code4;
+	u32 code5;
+};
+
+/*
+ * Command Structure for FW
+ */
+
+struct isp4fw_cmd {
+	u32 cmd_seq_num;
+	u32 cmd_id;
+	u32 cmd_param[12];
+	u16 cmd_stream_id;
+	u8 cmd_silent_resp;
+	u8 reserved;
+	u32 cmd_check_sum;
+};
+
+struct isp4fw_resp_cmd_done {
+	/*
+	 * The host2fw command seqNum.
+	 * To indicate which command this response refers to.
+	 */
+	u32 cmd_seq_num;
+	/* The host2fw command id for host double check. */
+	u32 cmd_id;
+	/*
+	 * Indicate the command process status.
+	 * 0 means success. 1 means fail. 2 means skipped
+	 */
+	u16 cmd_status;
+	/*
+	 * If the cmd_status is 1, that means the command is processed fail,
+	 * host can check the isp4fw_error_code to get the details
+	 * error information
+	 */
+	u16 isp4fw_error_code;
+	/* The response payload will be in different struct type */
+	/* according to different cmd done response. */
+	u8 payload[36];
+};
+
+struct isp4fw_resp_param_package {
+	u32 package_addr_lo;	/* The low 32 bit addr of the pkg address. */
+	u32 package_addr_hi;	/* The high 32 bit addr of the pkg address. */
+	u32 package_size;	/* The total pkg size in bytes. */
+	u32 package_check_sum;	/* The byte sum of the pkg. */
+};
+
+struct isp4fw_resp {
+	u32 resp_seq_num;
+	u32 resp_id;
+	union {
+		struct isp4fw_resp_cmd_done cmd_done;
+		struct isp4fw_resp_param_package frame_done;
+		u32 resp_param[12];
+	} param;
+	u8  reserved[4];
+	u32 resp_check_sum;
+};
+
+struct isp4fw_mipi_pipe_path_cfg {
+	u32 b_enable;
+	enum isp4fw_sensor_id isp4fw_sensor_id;
+};
+
+struct isp4fw_isp_pipe_path_cfg {
+	u32  isp_pipe_id;	/* pipe ids for pipeline construction */
+};
+
+struct isp4fw_isp_stream_cfg {
+	/* Isp mipi path */
+	struct isp4fw_mipi_pipe_path_cfg mipi_pipe_path_cfg;
+	/* Isp pipe path */
+	struct isp4fw_isp_pipe_path_cfg  isp_pipe_path_cfg;
+	/* enable TNR */
+	u32 b_enable_tnr;
+	/*
+	 * number of frame rta per-processing,
+	 * set to 0 to use fw default value
+	 */
+	u32 rta_frames_per_proc;
+};
+
+struct isp4fw_image_prop {
+	enum isp4fw_image_format image_format;	/* Image format */
+	u32 width;				/* Width */
+	u32 height;				/* Height */
+	u32 luma_pitch;				/* Luma pitch */
+	u32 chroma_pitch;			/* Chrom pitch */
+	enum isp4fw_yuv_range yuv_range;		/* YUV value range */
+};
+
+struct isp4fw_buffer {
+	/* A check num for debug usage, host need to */
+	/* set the buf_tags to different number */
+	u32 buf_tags;
+	union {
+		u32 value;
+		struct {
+			u32 space : 16;
+			u32 vmid  : 16;
+		} bit;
+	} vmid_space;
+	u32 buf_base_a_lo;		/* Low address of buffer A */
+	u32 buf_base_a_hi;		/* High address of buffer A */
+	u32 buf_size_a;			/* Buffer size of buffer A */
+
+	u32 buf_base_b_lo;		/* Low address of buffer B */
+	u32 buf_base_b_hi;		/* High address of buffer B */
+	u32 buf_size_b;			/* Buffer size of buffer B */
+
+	u32 buf_base_c_lo;		/* Low address of buffer C */
+	u32 buf_base_c_hi;		/* High address of buffer C */
+	u32 buf_size_c;			/* Buffer size of buffer C */
+};
+
+struct isp4fw_buffer_meta_info {
+	u32 enabled;					/* enabled flag */
+	enum isp4fw_buffer_status status;		/* BufferStatus */
+	struct isp4fw_error_code err;			/* err code */
+	enum isp4fw_buffer_source source;		/* BufferSource */
+	struct isp4fw_image_prop image_prop;		/* image_prop */
+	struct isp4fw_buffer buffer;			/* buffer */
+};
+
+struct isp4fw_meta_info {
+	u32 poc;				/* frame id */
+	u32 fc_id;				/* frame ctl id */
+	u32 time_stamp_lo;			/* time_stamp_lo */
+	u32 time_stamp_hi;			/* time_stamp_hi */
+	struct isp4fw_buffer_meta_info preview;	/* preview BufferMetaInfo */
+};
+
+struct isp4fw_cmd_send_buffer {
+	enum isp4fw_buffer_type buffer_type;	/* buffer Type */
+	struct isp4fw_buffer buffer;		/* buffer info */
+};
+
+struct isp4fw_cmd_set_out_ch_prop {
+	enum isp4fw_pipe_out_ch ch;	/* ISP pipe out channel */
+	struct isp4fw_image_prop image_prop;	/* image property */
+};
+
+struct isp4fw_cmd_enable_out_ch {
+	enum isp4fw_pipe_out_ch ch;	/* ISP pipe out channel */
+	u32 is_enable;			/* If enable channel or not */
+};
+
+struct isp4fw_cmd_set_stream_cfg {
+	struct isp4fw_isp_stream_cfg stream_cfg; /* stream path config */
+};
+
+#endif /* _ISP4_CMD_RESP_H_ */
diff --git a/drivers/media/platform/amd/isp4/isp4_hw_reg.h b/drivers/media/platform/amd/isp4/isp4_hw_reg.h
new file mode 100644
index 000000000000..09c76f75c5ee
--- /dev/null
+++ b/drivers/media/platform/amd/isp4/isp4_hw_reg.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef _ISP4_HW_REG_H_
+#define _ISP4_HW_REG_H_
+
+#include <linux/io.h>
+
+#define ISP_SOFT_RESET			0x62000
+#define ISP_SYS_INT0_EN			0x62010
+#define ISP_SYS_INT0_STATUS		0x62014
+#define ISP_SYS_INT0_ACK		0x62018
+#define ISP_CCPU_CNTL			0x62054
+#define ISP_STATUS			0x62058
+#define ISP_LOG_RB_BASE_LO0		0x62148
+#define ISP_LOG_RB_BASE_HI0		0x6214c
+#define ISP_LOG_RB_SIZE0		0x62150
+#define ISP_LOG_RB_RPTR0		0x62154
+#define ISP_LOG_RB_WPTR0		0x62158
+#define ISP_RB_BASE_LO1			0x62170
+#define ISP_RB_BASE_HI1			0x62174
+#define ISP_RB_SIZE1			0x62178
+#define ISP_RB_RPTR1			0x6217c
+#define ISP_RB_WPTR1			0x62180
+#define ISP_RB_BASE_LO2			0x62184
+#define ISP_RB_BASE_HI2			0x62188
+#define ISP_RB_SIZE2			0x6218c
+#define ISP_RB_RPTR2			0x62190
+#define ISP_RB_WPTR2			0x62194
+#define ISP_RB_BASE_LO3			0x62198
+#define ISP_RB_BASE_HI3			0x6219c
+#define ISP_RB_SIZE3			0x621a0
+#define ISP_RB_RPTR3			0x621a4
+#define ISP_RB_WPTR3			0x621a8
+#define ISP_RB_BASE_LO4			0x621ac
+#define ISP_RB_BASE_HI4			0x621b0
+#define ISP_RB_SIZE4			0x621b4
+#define ISP_RB_RPTR4			0x621b8
+#define ISP_RB_WPTR4			0x621bc
+#define ISP_RB_BASE_LO5			0x621c0
+#define ISP_RB_BASE_HI5			0x621c4
+#define ISP_RB_SIZE5			0x621c8
+#define ISP_RB_RPTR5			0x621cc
+#define ISP_RB_WPTR5			0x621d0
+#define ISP_RB_BASE_LO6			0x621d4
+#define ISP_RB_BASE_HI6			0x621d8
+#define ISP_RB_SIZE6			0x621dc
+#define ISP_RB_RPTR6			0x621e0
+#define ISP_RB_WPTR6			0x621e4
+#define ISP_RB_BASE_LO7			0x621e8
+#define ISP_RB_BASE_HI7			0x621ec
+#define ISP_RB_SIZE7			0x621f0
+#define ISP_RB_RPTR7			0x621f4
+#define ISP_RB_WPTR7			0x621f8
+#define ISP_RB_BASE_LO8			0x621fc
+#define ISP_RB_BASE_HI8			0x62200
+#define ISP_RB_SIZE8			0x62204
+#define ISP_RB_RPTR8			0x62208
+#define ISP_RB_WPTR8			0x6220c
+#define ISP_RB_BASE_LO9			0x62210
+#define ISP_RB_BASE_HI9			0x62214
+#define ISP_RB_SIZE9			0x62218
+#define ISP_RB_RPTR9			0x6221c
+#define ISP_RB_WPTR9			0x62220
+#define ISP_RB_BASE_LO10		0x62224
+#define ISP_RB_BASE_HI10		0x62228
+#define ISP_RB_SIZE10			0x6222c
+#define ISP_RB_RPTR10			0x62230
+#define ISP_RB_WPTR10			0x62234
+#define ISP_RB_BASE_LO11		0x62238
+#define ISP_RB_BASE_HI11		0x6223c
+#define ISP_RB_SIZE11			0x62240
+#define ISP_RB_RPTR11			0x62244
+#define ISP_RB_WPTR11			0x62248
+#define ISP_RB_BASE_LO12		0x6224c
+#define ISP_RB_BASE_HI12		0x62250
+#define ISP_RB_SIZE12			0x62254
+#define ISP_RB_RPTR12			0x62258
+#define ISP_RB_WPTR12			0x6225c
+
+#define ISP_POWER_STATUS		0x60000
+
+/* ISP_SOFT_RESET */
+#define ISP_SOFT_RESET__CCPU_SOFT_RESET_MASK			0x00000001UL
+
+/* ISP_CCPU_CNTL */
+#define ISP_CCPU_CNTL__CCPU_HOST_SOFT_RST_MASK			0x00040000UL
+
+/* ISP_STATUS */
+#define ISP_STATUS__CCPU_REPORT_MASK				0x000000feUL
+
+/* ISP_SYS_INT0_STATUS */
+#define ISP_SYS_INT0_STATUS__SYS_INT_RINGBUFFER_WPT9_INT_MASK	0x00010000UL
+#define ISP_SYS_INT0_STATUS__SYS_INT_RINGBUFFER_WPT10_INT_MASK	0x00040000UL
+#define ISP_SYS_INT0_STATUS__SYS_INT_RINGBUFFER_WPT11_INT_MASK	0x00100000UL
+#define ISP_SYS_INT0_STATUS__SYS_INT_RINGBUFFER_WPT12_INT_MASK	0x00400000UL
+
+/* ISP_SYS_INT0_EN */
+#define ISP_SYS_INT0_EN__SYS_INT_RINGBUFFER_WPT9_EN_MASK	0x00010000UL
+#define ISP_SYS_INT0_EN__SYS_INT_RINGBUFFER_WPT10_EN_MASK	0x00040000UL
+#define ISP_SYS_INT0_EN__SYS_INT_RINGBUFFER_WPT11_EN_MASK	0x00100000UL
+#define ISP_SYS_INT0_EN__SYS_INT_RINGBUFFER_WPT12_EN_MASK	0x00400000UL
+
+/* ISP_SYS_INT0_ACK */
+#define ISP_SYS_INT0_ACK__SYS_INT_RINGBUFFER_WPT9_ACK_MASK	0x00010000UL
+#define ISP_SYS_INT0_ACK__SYS_INT_RINGBUFFER_WPT10_ACK_MASK	0x00040000UL
+#define ISP_SYS_INT0_ACK__SYS_INT_RINGBUFFER_WPT11_ACK_MASK	0x00100000UL
+#define ISP_SYS_INT0_ACK__SYS_INT_RINGBUFFER_WPT12_ACK_MASK	0x00400000UL
+
+/* Helper functions for reading isp registers */
+static inline u32 isp4hw_rreg(void __iomem *base, u32 reg)
+{
+	return readl(base + reg);
+}
+
+/* Helper functions for writing isp registers */
+static inline void isp4hw_wreg(void __iomem *base, u32 reg, u32 val)
+{
+	return writel(val, base + reg);
+}
+
+#endif /* _ISP4_HW_REG_H_ */
diff --git a/drivers/media/platform/amd/isp4/isp4_interface.c b/drivers/media/platform/amd/isp4/isp4_interface.c
new file mode 100644
index 000000000000..c3b19bab7a02
--- /dev/null
+++ b/drivers/media/platform/amd/isp4/isp4_interface.c
@@ -0,0 +1,789 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#include <linux/iopoll.h>
+
+#include "isp4_debug.h"
+#include "isp4_fw_cmd_resp.h"
+#include "isp4_hw_reg.h"
+#include "isp4_interface.h"
+
+#define ISP4IF_FW_RESP_RB_IRQ_EN_MASK \
+	(ISP_SYS_INT0_EN__SYS_INT_RINGBUFFER_WPT9_EN_MASK |  \
+	 ISP_SYS_INT0_EN__SYS_INT_RINGBUFFER_WPT12_EN_MASK)
+
+#define ISP4IF_FW_CMD_TIMEOUT (HZ / 2)
+
+struct isp4if_rb_config {
+	const char *name;
+	u32 index;
+	u32 reg_rptr;
+	u32 reg_wptr;
+	u32 reg_base_lo;
+	u32 reg_base_hi;
+	u32 reg_size;
+	u32 val_size;
+	u64 base_mc_addr;
+	void *base_sys_addr;
+};
+
+/* FW cmd ring buffer configuration */
+static struct isp4if_rb_config
+	isp4if_cmd_rb_config[ISP4IF_STREAM_ID_MAX] = {
+	{
+		.name = "CMD_RB_GBL0",
+		.index = 3,
+		.reg_rptr = ISP_RB_RPTR4,
+		.reg_wptr = ISP_RB_WPTR4,
+		.reg_base_lo = ISP_RB_BASE_LO4,
+		.reg_base_hi = ISP_RB_BASE_HI4,
+		.reg_size = ISP_RB_SIZE4,
+	},
+	{
+		.name = "CMD_RB_STR1",
+		.index = 0,
+		.reg_rptr = ISP_RB_RPTR1,
+		.reg_wptr = ISP_RB_WPTR1,
+		.reg_base_lo = ISP_RB_BASE_LO1,
+		.reg_base_hi = ISP_RB_BASE_HI1,
+		.reg_size = ISP_RB_SIZE1,
+	},
+	{
+		.name = "CMD_RB_STR2",
+		.index = 1,
+		.reg_rptr = ISP_RB_RPTR2,
+		.reg_wptr = ISP_RB_WPTR2,
+		.reg_base_lo = ISP_RB_BASE_LO2,
+		.reg_base_hi = ISP_RB_BASE_HI2,
+		.reg_size = ISP_RB_SIZE2,
+	},
+	{
+		.name = "CMD_RB_STR3",
+		.index = 2,
+		.reg_rptr = ISP_RB_RPTR3,
+		.reg_wptr = ISP_RB_WPTR3,
+		.reg_base_lo = ISP_RB_BASE_LO3,
+		.reg_base_hi = ISP_RB_BASE_HI3,
+		.reg_size = ISP_RB_SIZE3,
+	},
+};
+
+/* FW resp ring buffer configuration */
+static struct isp4if_rb_config
+	isp4if_resp_rb_config[ISP4IF_STREAM_ID_MAX] = {
+	{
+		.name = "RES_RB_GBL0",
+		.index = 3,
+		.reg_rptr = ISP_RB_RPTR12,
+		.reg_wptr = ISP_RB_WPTR12,
+		.reg_base_lo = ISP_RB_BASE_LO12,
+		.reg_base_hi = ISP_RB_BASE_HI12,
+		.reg_size = ISP_RB_SIZE12,
+	},
+	{
+		.name = "RES_RB_STR1",
+		.index = 0,
+		.reg_rptr = ISP_RB_RPTR9,
+		.reg_wptr = ISP_RB_WPTR9,
+		.reg_base_lo = ISP_RB_BASE_LO9,
+		.reg_base_hi = ISP_RB_BASE_HI9,
+		.reg_size = ISP_RB_SIZE9,
+	},
+	{
+		.name = "RES_RB_STR2",
+		.index = 1,
+		.reg_rptr = ISP_RB_RPTR10,
+		.reg_wptr = ISP_RB_WPTR10,
+		.reg_base_lo = ISP_RB_BASE_LO10,
+		.reg_base_hi = ISP_RB_BASE_HI10,
+		.reg_size = ISP_RB_SIZE10,
+	},
+	{
+		.name = "RES_RB_STR3",
+		.index = 2,
+		.reg_rptr = ISP_RB_RPTR11,
+		.reg_wptr = ISP_RB_WPTR11,
+		.reg_base_lo = ISP_RB_BASE_LO11,
+		.reg_base_hi = ISP_RB_BASE_HI11,
+		.reg_size = ISP_RB_SIZE11,
+	},
+};
+
+/* FW log ring buffer configuration */
+static struct isp4if_rb_config isp4if_log_rb_config = {
+	.name = "LOG_RB",
+	.index = 0,
+	.reg_rptr = ISP_LOG_RB_RPTR0,
+	.reg_wptr = ISP_LOG_RB_WPTR0,
+	.reg_base_lo = ISP_LOG_RB_BASE_LO0,
+	.reg_base_hi = ISP_LOG_RB_BASE_HI0,
+	.reg_size = ISP_LOG_RB_SIZE0,
+};
+
+static struct isp4if_gpu_mem_info *isp4if_gpu_mem_alloc(struct isp4_interface *ispif, u32 mem_size)
+{
+	struct isp4if_gpu_mem_info *mem_info;
+	struct device *dev = ispif->dev;
+	int ret;
+
+	mem_info = kmalloc(sizeof(*mem_info), GFP_KERNEL);
+	if (!mem_info)
+		return NULL;
+
+	mem_info->mem_size = mem_size;
+	ret = isp_kernel_buffer_alloc(dev, mem_info->mem_size, &mem_info->mem_handle,
+				      &mem_info->gpu_mc_addr, &mem_info->sys_addr);
+	if (ret) {
+		kfree(mem_info);
+		return NULL;
+	}
+
+	return mem_info;
+}
+
+static void isp4if_gpu_mem_free(struct isp4_interface *ispif,
+				struct isp4if_gpu_mem_info **mem_info_ptr)
+{
+	struct isp4if_gpu_mem_info *mem_info = *mem_info_ptr;
+	struct device *dev = ispif->dev;
+
+	if (!mem_info) {
+		dev_err(dev, "invalid mem_info\n");
+		return;
+	}
+
+	*mem_info_ptr = NULL;
+	isp_kernel_buffer_free(&mem_info->mem_handle, &mem_info->gpu_mc_addr, &mem_info->sys_addr);
+	kfree(mem_info);
+}
+
+static void isp4if_dealloc_fw_gpumem(struct isp4_interface *ispif)
+{
+	int i;
+
+	isp4if_gpu_mem_free(ispif, &ispif->fw_mem_pool);
+	isp4if_gpu_mem_free(ispif, &ispif->fw_cmd_resp_buf);
+	isp4if_gpu_mem_free(ispif, &ispif->fw_log_buf);
+
+	for (i = 0; i < ISP4IF_MAX_STREAM_BUF_COUNT; i++)
+		isp4if_gpu_mem_free(ispif, &ispif->meta_info_buf[i]);
+}
+
+static int isp4if_alloc_fw_gpumem(struct isp4_interface *ispif)
+{
+	struct device *dev = ispif->dev;
+	int i;
+
+	ispif->fw_mem_pool = isp4if_gpu_mem_alloc(ispif, FW_MEMORY_POOL_SIZE);
+	if (!ispif->fw_mem_pool)
+		goto error_no_memory;
+
+	ispif->fw_cmd_resp_buf =
+		isp4if_gpu_mem_alloc(ispif, ISP4IF_RB_PMBMAP_MEM_SIZE);
+	if (!ispif->fw_cmd_resp_buf)
+		goto error_no_memory;
+
+	ispif->fw_log_buf =
+		isp4if_gpu_mem_alloc(ispif, ISP4IF_FW_LOG_RINGBUF_SIZE);
+	if (!ispif->fw_log_buf)
+		goto error_no_memory;
+
+	for (i = 0; i < ISP4IF_MAX_STREAM_BUF_COUNT; i++) {
+		ispif->meta_info_buf[i] =
+			isp4if_gpu_mem_alloc(ispif, ISP4IF_META_INFO_BUF_SIZE);
+		if (!ispif->meta_info_buf[i])
+			goto error_no_memory;
+	}
+
+	return 0;
+
+error_no_memory:
+	dev_err(dev, "failed to allocate gpu memory\n");
+	return -ENOMEM;
+}
+
+static u32 isp4if_compute_check_sum(const void *buf, size_t buf_size)
+{
+	const u8 *surplus_ptr;
+	const u32 *buffer;
+	u32 checksum = 0;
+	size_t i;
+
+	buffer = (const u32 *)buf;
+	for (i = 0; i < buf_size / sizeof(u32); i++)
+		checksum += buffer[i];
+
+	surplus_ptr = (const u8 *)&buffer[i];
+	/* add surplus data crc checksum */
+	for (i = 0; i < buf_size % sizeof(u32); i++)
+		checksum += surplus_ptr[i];
+
+	return checksum;
+}
+
+void isp4if_clear_cmdq(struct isp4_interface *ispif)
+{
+	struct isp4if_cmd_element *buf_node, *tmp_node;
+	LIST_HEAD(free_list);
+
+	scoped_guard(spinlock, &ispif->cmdq_lock)
+		list_splice_init(&ispif->cmdq, &free_list);
+
+	list_for_each_entry_safe(buf_node, tmp_node, &free_list, list)
+		kfree(buf_node);
+}
+
+static bool isp4if_is_cmdq_rb_full(struct isp4_interface *ispif, enum isp4if_stream_id stream)
+{
+	struct isp4if_rb_config *rb_config = &isp4if_cmd_rb_config[stream];
+	u32 rreg = rb_config->reg_rptr, wreg = rb_config->reg_wptr;
+	u32 len = rb_config->val_size;
+	u32 rd_ptr, wr_ptr;
+	u32 bytes_free;
+
+	rd_ptr = isp4hw_rreg(ispif->mmio, rreg);
+	wr_ptr = isp4hw_rreg(ispif->mmio, wreg);
+
+	/* Read and write pointers are equal, indicating the ringbuf is empty */
+	if (wr_ptr == rd_ptr)
+		return false;
+
+	if (wr_ptr > rd_ptr)
+		bytes_free = len - (wr_ptr - rd_ptr);
+	else
+		bytes_free = rd_ptr - wr_ptr;
+
+	/*
+	 * Ignore one byte from the bytes free to prevent rd_ptr from equaling
+	 * wr_ptr when the ringbuf is full, because rd_ptr == wr_ptr is supposed
+	 * to indicate that the ringbuf is empty.
+	 */
+	return bytes_free <= sizeof(struct isp4fw_cmd);
+}
+
+struct isp4if_cmd_element *isp4if_rm_cmd_from_cmdq(struct isp4_interface *ispif, u32 seq_num,
+						   u32 cmd_id)
+{
+	struct isp4if_cmd_element *buf_node;
+
+	guard(spinlock)(&ispif->cmdq_lock);
+
+	list_for_each_entry(buf_node, &ispif->cmdq, list) {
+		if (buf_node->seq_num == seq_num && buf_node->cmd_id == cmd_id) {
+			list_del(&buf_node->list);
+			return buf_node;
+		}
+	}
+
+	return NULL;
+}
+
+/* Must check that isp4if_is_cmdq_rb_full() == false before calling */
+static int isp4if_insert_isp_fw_cmd(struct isp4_interface *ispif, enum isp4if_stream_id stream,
+				    const struct isp4fw_cmd *cmd)
+{
+	struct isp4if_rb_config *rb_config = &isp4if_cmd_rb_config[stream];
+	u32 rreg = rb_config->reg_rptr, wreg = rb_config->reg_wptr;
+	void *mem_sys = rb_config->base_sys_addr;
+	const u32 cmd_sz = sizeof(*cmd);
+	struct device *dev = ispif->dev;
+	u32 len = rb_config->val_size;
+	const void *src = cmd;
+	u32 rd_ptr, wr_ptr;
+	u32 bytes_to_end;
+
+	rd_ptr = isp4hw_rreg(ispif->mmio, rreg);
+	wr_ptr = isp4hw_rreg(ispif->mmio, wreg);
+	if (rd_ptr >= len || wr_ptr >= len) {
+		dev_err(dev, "rb invalid: stream=%u(%s), rd=%u, wr=%u, len=%u, cmd_sz=%u\n",
+			stream, isp4dbg_get_if_stream_str(stream), rd_ptr, wr_ptr, len, cmd_sz);
+		return -EINVAL;
+	}
+
+	bytes_to_end = len - wr_ptr;
+	if (bytes_to_end >= cmd_sz) {
+		/* FW cmd is just a straight copy to the write pointer */
+		memcpy(mem_sys + wr_ptr, src, cmd_sz);
+		isp4hw_wreg(ispif->mmio, wreg, (wr_ptr + cmd_sz) % len);
+	} else {
+		/* FW cmd is split because the ringbuf needs to wrap around */
+		memcpy(mem_sys + wr_ptr, src, bytes_to_end);
+		memcpy(mem_sys, src + bytes_to_end, cmd_sz - bytes_to_end);
+		isp4hw_wreg(ispif->mmio, wreg, cmd_sz - bytes_to_end);
+	}
+
+	return 0;
+}
+
+static inline enum isp4if_stream_id isp4if_get_fw_stream(u32 cmd_id)
+{
+	return ISP4IF_STREAM_ID_1;
+}
+
+static int isp4if_send_fw_cmd(struct isp4_interface *ispif, u32 cmd_id, const void *package,
+			      u32 package_size, bool sync)
+{
+	enum isp4if_stream_id stream = isp4if_get_fw_stream(cmd_id);
+	struct isp4if_cmd_element *ele = NULL;
+	struct device *dev = ispif->dev;
+	struct isp4fw_cmd cmd;
+	u32 seq_num;
+	int ret;
+
+	if (package_size > sizeof(cmd.cmd_param)) {
+		dev_err(dev, "fail pkgsize(%u)>%zu cmd:0x%x,stream %d\n",
+			package_size, sizeof(cmd.cmd_param), cmd_id, stream);
+		return -EINVAL;
+	}
+
+	/*
+	 * The struct will be shared with ISP FW, use memset() to guarantee padding bits are
+	 * zeroed, since this is not guaranteed on all compilers.
+	 */
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.cmd_id = cmd_id;
+	switch (stream) {
+	case ISP4IF_STREAM_ID_GLOBAL:
+		cmd.cmd_stream_id = STREAM_ID_INVALID;
+		break;
+	case ISP4IF_STREAM_ID_1:
+		cmd.cmd_stream_id = STREAM_ID_1;
+		break;
+	default:
+		dev_err(dev, "fail bad stream id %d\n", stream);
+		return -EINVAL;
+	}
+
+	/* Allocate the sync command object early and outside of the lock */
+	if (sync) {
+		ele = kmalloc(sizeof(*ele), GFP_KERNEL);
+		if (!ele)
+			return -ENOMEM;
+
+		/* Get two references: one for the resp thread, one for us */
+		atomic_set(&ele->refcnt, 2);
+		init_completion(&ele->cmd_done);
+	}
+
+	if (package && package_size)
+		memcpy(cmd.cmd_param, package, package_size);
+
+	scoped_guard(mutex, &ispif->isp4if_mutex) {
+		ret = read_poll_timeout(isp4if_is_cmdq_rb_full, ret, !ret, ISP4IF_RB_FULL_SLEEP_US,
+					ISP4IF_RB_FULL_TIMEOUT_US, false, ispif, stream);
+		if (ret) {
+			struct isp4if_rb_config *rb_config = &isp4if_resp_rb_config[stream];
+			u32 rd_ptr = isp4hw_rreg(ispif->mmio, rb_config->reg_rptr);
+			u32 wr_ptr = isp4hw_rreg(ispif->mmio, rb_config->reg_wptr);
+
+			dev_err(dev,
+				"failed to get free cmdq slot, stream %s(%d),rd %u, wr %u\n",
+				isp4dbg_get_if_stream_str(stream),
+				stream, rd_ptr, wr_ptr);
+			ret = -ETIMEDOUT;
+			goto free_ele;
+		}
+
+		seq_num = ispif->host2fw_seq_num++;
+		cmd.cmd_seq_num = seq_num;
+		cmd.cmd_check_sum = isp4if_compute_check_sum(&cmd, sizeof(cmd) - sizeof(u32));
+
+		/*
+		 * only append the fw cmd to queue when its response needs to be waited for,
+		 * currently there are only two such commands, disable channel and stop stream
+		 * which are only sent after close camera
+		 */
+		if (ele) {
+			ele->seq_num = seq_num;
+			ele->cmd_id = cmd_id;
+			scoped_guard(spinlock, &ispif->cmdq_lock)
+				list_add_tail(&ele->list, &ispif->cmdq);
+		}
+
+		ret = isp4if_insert_isp_fw_cmd(ispif, stream, &cmd);
+		if (ret) {
+			dev_err(dev, "fail for insert_isp_fw_cmd cmd_id %s(0x%08x)\n",
+				isp4dbg_get_cmd_str(cmd_id), cmd_id);
+			goto err_dequeue_ele;
+		}
+	}
+
+	if (ele) {
+		ret = wait_for_completion_timeout(&ele->cmd_done, ISP4IF_FW_CMD_TIMEOUT);
+		if (!ret) {
+			ret = -ETIMEDOUT;
+			goto err_dequeue_ele;
+		}
+
+		ret = 0;
+		goto put_ele_ref;
+	}
+
+	return 0;
+
+err_dequeue_ele:
+	/*
+	 * Try to remove the command from the queue. If that fails, then it
+	 * means the response thread is currently using the object, and we need
+	 * to use the refcount to avoid a use-after-free by either side.
+	 */
+	if (ele && isp4if_rm_cmd_from_cmdq(ispif, seq_num, cmd_id))
+		goto free_ele;
+
+put_ele_ref:
+	/* Don't free the command if we didn't put the last reference */
+	if (ele && atomic_dec_return(&ele->refcnt))
+		ele = NULL;
+
+free_ele:
+	kfree(ele);
+	return ret;
+}
+
+static int isp4if_send_buffer(struct isp4_interface *ispif, struct isp4if_img_buf_info *buf_info)
+{
+	struct isp4fw_cmd_send_buffer cmd;
+
+	/*
+	 * The struct will be shared with ISP FW, use memset() to guarantee padding bits are
+	 * zeroed, since this is not guaranteed on all compilers.
+	 */
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.buffer_type = BUFFER_TYPE_PREVIEW;
+	cmd.buffer.vmid_space.bit.space = ADDR_SPACE_TYPE_GPU_VA;
+	isp4if_split_addr64(buf_info->planes[0].mc_addr,
+			    &cmd.buffer.buf_base_a_lo,
+			    &cmd.buffer.buf_base_a_hi);
+	cmd.buffer.buf_size_a = buf_info->planes[0].len;
+
+	isp4if_split_addr64(buf_info->planes[1].mc_addr,
+			    &cmd.buffer.buf_base_b_lo,
+			    &cmd.buffer.buf_base_b_hi);
+	cmd.buffer.buf_size_b = buf_info->planes[1].len;
+
+	isp4if_split_addr64(buf_info->planes[2].mc_addr,
+			    &cmd.buffer.buf_base_c_lo,
+			    &cmd.buffer.buf_base_c_hi);
+	cmd.buffer.buf_size_c = buf_info->planes[2].len;
+
+	return isp4if_send_fw_cmd(ispif, CMD_ID_SEND_BUFFER, &cmd, sizeof(cmd), false);
+}
+
+static void isp4if_init_rb_config(struct isp4_interface *ispif, struct isp4if_rb_config *rb_config)
+{
+	isp4hw_wreg(ispif->mmio, rb_config->reg_rptr, 0x0);
+	isp4hw_wreg(ispif->mmio, rb_config->reg_wptr, 0x0);
+	isp4hw_wreg(ispif->mmio, rb_config->reg_base_lo, rb_config->base_mc_addr);
+	isp4hw_wreg(ispif->mmio, rb_config->reg_base_hi, rb_config->base_mc_addr >> 32);
+	isp4hw_wreg(ispif->mmio, rb_config->reg_size, rb_config->val_size);
+}
+
+static int isp4if_fw_init(struct isp4_interface *ispif)
+{
+	u32 aligned_rb_chunk_size = ISP4IF_RB_PMBMAP_MEM_CHUNK & 0xffffffc0;
+	struct isp4if_rb_config *rb_config;
+	u32 offset;
+	int i;
+
+	/* initialize CMD_RB streams */
+	for (i = 0; i < ISP4IF_STREAM_ID_MAX; i++) {
+		rb_config = (isp4if_cmd_rb_config + i);
+		offset = aligned_rb_chunk_size * rb_config->index;
+
+		rb_config->val_size = ISP4IF_FW_CMD_BUF_SIZE;
+		rb_config->base_sys_addr =
+			ispif->fw_cmd_resp_buf->sys_addr + offset;
+		rb_config->base_mc_addr =
+			ispif->fw_cmd_resp_buf->gpu_mc_addr + offset;
+
+		isp4if_init_rb_config(ispif, rb_config);
+	}
+
+	/* initialize RESP_RB streams */
+	for (i = 0; i < ISP4IF_STREAM_ID_MAX; i++) {
+		rb_config = (isp4if_resp_rb_config + i);
+		offset = aligned_rb_chunk_size *
+			 (rb_config->index + ISP4IF_RESP_CHAN_TO_RB_OFFSET - 1);
+
+		rb_config->val_size = ISP4IF_FW_CMD_BUF_SIZE;
+		rb_config->base_sys_addr =
+			ispif->fw_cmd_resp_buf->sys_addr + offset;
+		rb_config->base_mc_addr =
+			ispif->fw_cmd_resp_buf->gpu_mc_addr + offset;
+
+		isp4if_init_rb_config(ispif, rb_config);
+	}
+
+	/* initialize LOG_RB stream */
+	rb_config = &isp4if_log_rb_config;
+	rb_config->val_size = ISP4IF_FW_LOG_RINGBUF_SIZE;
+	rb_config->base_mc_addr = ispif->fw_log_buf->gpu_mc_addr;
+	rb_config->base_sys_addr = ispif->fw_log_buf->sys_addr;
+
+	isp4if_init_rb_config(ispif, rb_config);
+
+	return 0;
+}
+
+static int isp4if_wait_fw_ready(struct isp4_interface *ispif, u32 isp_status_addr)
+{
+	struct device *dev = ispif->dev;
+	u32 timeout_ms = 100;
+	u32 interval_ms = 1;
+	u32 reg_val;
+
+	/* wait for FW initialize done! */
+	if (!read_poll_timeout(isp4hw_rreg, reg_val, reg_val & ISP_STATUS__CCPU_REPORT_MASK,
+			       interval_ms * 1000, timeout_ms * 1000, false,
+			       ispif->mmio, isp_status_addr))
+		return 0;
+
+	dev_err(dev, "ISP CCPU FW boot failed\n");
+
+	return -ETIME;
+}
+
+static void isp4if_enable_ccpu(struct isp4_interface *ispif)
+{
+	u32 reg_val;
+
+	reg_val = isp4hw_rreg(ispif->mmio, ISP_SOFT_RESET);
+	reg_val &= (~ISP_SOFT_RESET__CCPU_SOFT_RESET_MASK);
+	isp4hw_wreg(ispif->mmio, ISP_SOFT_RESET, reg_val);
+
+	usleep_range(100, 150);
+
+	reg_val = isp4hw_rreg(ispif->mmio, ISP_CCPU_CNTL);
+	reg_val &= (~ISP_CCPU_CNTL__CCPU_HOST_SOFT_RST_MASK);
+	isp4hw_wreg(ispif->mmio, ISP_CCPU_CNTL, reg_val);
+}
+
+static void isp4if_disable_ccpu(struct isp4_interface *ispif)
+{
+	u32 reg_val;
+
+	reg_val = isp4hw_rreg(ispif->mmio, ISP_CCPU_CNTL);
+	reg_val |= ISP_CCPU_CNTL__CCPU_HOST_SOFT_RST_MASK;
+	isp4hw_wreg(ispif->mmio, ISP_CCPU_CNTL, reg_val);
+
+	usleep_range(100, 150);
+
+	reg_val = isp4hw_rreg(ispif->mmio, ISP_SOFT_RESET);
+	reg_val |= ISP_SOFT_RESET__CCPU_SOFT_RESET_MASK;
+	isp4hw_wreg(ispif->mmio, ISP_SOFT_RESET, reg_val);
+}
+
+static int isp4if_fw_boot(struct isp4_interface *ispif)
+{
+	struct device *dev = ispif->dev;
+
+	if (ispif->status != ISP4IF_STATUS_PWR_ON) {
+		dev_err(dev, "invalid isp power status %d\n", ispif->status);
+		return -EINVAL;
+	}
+
+	isp4if_disable_ccpu(ispif);
+
+	isp4if_fw_init(ispif);
+
+	/* clear ccpu status */
+	isp4hw_wreg(ispif->mmio, ISP_STATUS, 0x0);
+
+	isp4if_enable_ccpu(ispif);
+
+	if (isp4if_wait_fw_ready(ispif, ISP_STATUS)) {
+		isp4if_disable_ccpu(ispif);
+		return -EINVAL;
+	}
+
+	/* enable interrupts */
+	isp4hw_wreg(ispif->mmio, ISP_SYS_INT0_EN, ISP4IF_FW_RESP_RB_IRQ_EN_MASK);
+
+	ispif->status = ISP4IF_STATUS_FW_RUNNING;
+
+	dev_dbg(dev, "ISP CCPU FW boot success\n");
+
+	return 0;
+}
+
+int isp4if_f2h_resp(struct isp4_interface *ispif, enum isp4if_stream_id stream,
+		    struct isp4fw_resp *resp)
+{
+	struct isp4if_rb_config *rb_config = &isp4if_resp_rb_config[stream];
+	u32 rreg = rb_config->reg_rptr, wreg = rb_config->reg_wptr;
+	void *mem_sys = rb_config->base_sys_addr;
+	const u32 resp_sz = sizeof(*resp);
+	struct device *dev = ispif->dev;
+	u32 len = rb_config->val_size;
+	u32 rd_ptr, wr_ptr;
+	u32 bytes_to_end;
+	void *dst = resp;
+	u32 checksum;
+
+	rd_ptr = isp4hw_rreg(ispif->mmio, rreg);
+	wr_ptr = isp4hw_rreg(ispif->mmio, wreg);
+	if (rd_ptr >= len || wr_ptr >= len)
+		goto err_rb_invalid;
+
+	/* Read and write pointers are equal, indicating the ringbuf is empty */
+	if (rd_ptr == wr_ptr)
+		return -ENODATA;
+
+	bytes_to_end = len - rd_ptr;
+	if (bytes_to_end >= resp_sz) {
+		/* FW response is just a straight copy from the read pointer */
+		if (wr_ptr > rd_ptr && wr_ptr - rd_ptr < resp_sz)
+			goto err_rb_invalid;
+
+		memcpy(dst, mem_sys + rd_ptr, resp_sz);
+		isp4hw_wreg(ispif->mmio, rreg, (rd_ptr + resp_sz) % len);
+	} else {
+		/* FW response is split because the ringbuf wrapped around */
+		if (wr_ptr > rd_ptr || wr_ptr < resp_sz - bytes_to_end)
+			goto err_rb_invalid;
+
+		memcpy(dst, mem_sys + rd_ptr, bytes_to_end);
+		memcpy(dst + bytes_to_end, mem_sys, resp_sz - bytes_to_end);
+		isp4hw_wreg(ispif->mmio, rreg, resp_sz - bytes_to_end);
+	}
+
+	checksum = isp4if_compute_check_sum(resp, resp_sz - sizeof(u32));
+	if (checksum != resp->resp_check_sum) {
+		dev_err(dev, "resp checksum 0x%x,should 0x%x,rptr %u,wptr %u\n",
+			checksum, resp->resp_check_sum, rd_ptr, wr_ptr);
+		dev_err(dev, "%s(%u), seqNo %u, resp_id %s(0x%x)\n",
+			isp4dbg_get_if_stream_str(stream), stream, resp->resp_seq_num,
+			isp4dbg_get_resp_str(resp->resp_id), resp->resp_id);
+		return -EINVAL;
+	}
+
+	return 0;
+
+err_rb_invalid:
+	dev_err(dev, "rb invalid: stream=%u(%s), rd=%u, wr=%u, len=%u, resp_sz=%u\n",
+		stream, isp4dbg_get_if_stream_str(stream), rd_ptr, wr_ptr, len, resp_sz);
+	return -EINVAL;
+}
+
+int isp4if_send_command(struct isp4_interface *ispif, u32 cmd_id, const void *package,
+			u32 package_size)
+{
+	return isp4if_send_fw_cmd(ispif, cmd_id, package, package_size, false);
+}
+
+int isp4if_send_command_sync(struct isp4_interface *ispif, u32 cmd_id, const void *package,
+			     u32 package_size)
+{
+	return isp4if_send_fw_cmd(ispif, cmd_id, package, package_size, true);
+}
+
+void isp4if_clear_bufq(struct isp4_interface *ispif)
+{
+	struct isp4if_img_buf_node *buf_node, *tmp_node;
+	LIST_HEAD(free_list);
+
+	scoped_guard(spinlock, &ispif->bufq_lock)
+		list_splice_init(&ispif->bufq, &free_list);
+
+	list_for_each_entry_safe(buf_node, tmp_node, &free_list, node)
+		kfree(buf_node);
+}
+
+void isp4if_dealloc_buffer_node(struct isp4if_img_buf_node *buf_node)
+{
+	kfree(buf_node);
+}
+
+struct isp4if_img_buf_node *isp4if_alloc_buffer_node(struct isp4if_img_buf_info *buf_info)
+{
+	struct isp4if_img_buf_node *node;
+
+	node = kmalloc(sizeof(*node), GFP_KERNEL);
+	if (node)
+		node->buf_info = *buf_info;
+
+	return node;
+}
+
+struct isp4if_img_buf_node *isp4if_dequeue_buffer(struct isp4_interface *ispif)
+{
+	struct isp4if_img_buf_node *buf_node;
+
+	guard(spinlock)(&ispif->bufq_lock);
+
+	buf_node = list_first_entry_or_null(&ispif->bufq, typeof(*buf_node), node);
+	if (buf_node)
+		list_del(&buf_node->node);
+
+	return buf_node;
+}
+
+int isp4if_queue_buffer(struct isp4_interface *ispif, struct isp4if_img_buf_node *buf_node)
+{
+	int ret;
+
+	ret = isp4if_send_buffer(ispif, &buf_node->buf_info);
+	if (ret)
+		return ret;
+
+	scoped_guard(spinlock, &ispif->bufq_lock)
+		list_add_tail(&buf_node->node, &ispif->bufq);
+
+	return 0;
+}
+
+int isp4if_stop(struct isp4_interface *ispif)
+{
+	isp4if_disable_ccpu(ispif);
+
+	isp4if_dealloc_fw_gpumem(ispif);
+
+	return 0;
+}
+
+int isp4if_start(struct isp4_interface *ispif)
+{
+	int ret;
+
+	ret = isp4if_alloc_fw_gpumem(ispif);
+	if (ret)
+		return ret;
+
+	ret = isp4if_fw_boot(ispif);
+	if (ret)
+		goto failed_fw_boot;
+
+	return 0;
+
+failed_fw_boot:
+	isp4if_dealloc_fw_gpumem(ispif);
+	return ret;
+}
+
+int isp4if_deinit(struct isp4_interface *ispif)
+{
+	isp4if_clear_cmdq(ispif);
+
+	isp4if_clear_bufq(ispif);
+
+	mutex_destroy(&ispif->isp4if_mutex);
+
+	return 0;
+}
+
+int isp4if_init(struct isp4_interface *ispif, struct device *dev, void __iomem *isp_mmio)
+{
+	ispif->dev = dev;
+	ispif->mmio = isp_mmio;
+
+	spin_lock_init(&ispif->cmdq_lock); /* used for cmdq access */
+	spin_lock_init(&ispif->bufq_lock); /* used for bufq access */
+	mutex_init(&ispif->isp4if_mutex); /* used for commands sent to ispfw */
+
+	INIT_LIST_HEAD(&ispif->cmdq);
+	INIT_LIST_HEAD(&ispif->bufq);
+
+	return 0;
+}
diff --git a/drivers/media/platform/amd/isp4/isp4_interface.h b/drivers/media/platform/amd/isp4/isp4_interface.h
new file mode 100644
index 000000000000..01d5268f7d4c
--- /dev/null
+++ b/drivers/media/platform/amd/isp4/isp4_interface.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef _ISP4_INTERFACE_H_
+#define _ISP4_INTERFACE_H_
+
+#include <drm/amd/isp.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+
+struct isp4fw_resp;
+
+#define ISP4IF_RB_MAX 25
+#define ISP4IF_RESP_CHAN_TO_RB_OFFSET 9
+#define ISP4IF_RB_PMBMAP_MEM_SIZE (SZ_16M - 1)
+#define ISP4IF_RB_PMBMAP_MEM_CHUNK \
+	(ISP4IF_RB_PMBMAP_MEM_SIZE / (ISP4IF_RB_MAX - 1))
+#define ISP4IF_HOST2FW_COMMAND_SIZE sizeof(struct isp4fw_cmd)
+#define ISP4IF_MAX_NUM_HOST2FW_COMMAND 40
+#define ISP4IF_FW_CMD_BUF_SIZE \
+	(ISP4IF_MAX_NUM_HOST2FW_COMMAND * ISP4IF_HOST2FW_COMMAND_SIZE)
+#define ISP4IF_RB_FULL_SLEEP_US (33 * USEC_PER_MSEC)
+#define ISP4IF_RB_FULL_TIMEOUT_US (10 * ISP4IF_RB_FULL_SLEEP_US)
+
+#define ISP4IF_META_INFO_BUF_SIZE ALIGN(sizeof(struct isp4fw_meta_info), 0x8000)
+#define ISP4IF_MAX_STREAM_BUF_COUNT 8
+
+#define ISP4IF_FW_LOG_RINGBUF_SIZE SZ_2M
+
+enum isp4if_stream_id {
+	ISP4IF_STREAM_ID_GLOBAL = 0,
+	ISP4IF_STREAM_ID_1 = 1,
+	ISP4IF_STREAM_ID_MAX = 4
+};
+
+enum isp4if_status {
+	ISP4IF_STATUS_PWR_OFF,
+	ISP4IF_STATUS_PWR_ON,
+	ISP4IF_STATUS_FW_RUNNING,
+	ISP4IF_FSM_STATUS_MAX
+};
+
+struct isp4if_gpu_mem_info {
+	u64 mem_size;
+	u64 gpu_mc_addr;
+	void *sys_addr;
+	void *mem_handle;
+};
+
+struct isp4if_img_buf_info {
+	struct {
+		void *sys_addr;
+		u64 mc_addr;
+		u32 len;
+	} planes[3];
+};
+
+struct isp4if_img_buf_node {
+	struct list_head node;
+	struct isp4if_img_buf_info buf_info;
+};
+
+struct isp4if_cmd_element {
+	struct list_head list;
+	u32 seq_num;
+	u32 cmd_id;
+	struct completion cmd_done;
+	atomic_t refcnt;
+};
+
+struct isp4_interface {
+	struct device *dev;
+	void __iomem *mmio;
+
+	spinlock_t cmdq_lock; /* used for cmdq access */
+	spinlock_t bufq_lock; /* used for bufq access */
+	struct mutex isp4if_mutex; /* used to send fw cmd and read fw log */
+
+	struct list_head cmdq; /* commands sent to fw */
+	struct list_head bufq; /* buffers sent to fw */
+
+	enum isp4if_status status;
+	u32 host2fw_seq_num;
+
+	/* ISP fw buffers */
+	struct isp4if_gpu_mem_info *fw_log_buf;
+	struct isp4if_gpu_mem_info *fw_cmd_resp_buf;
+	struct isp4if_gpu_mem_info *fw_mem_pool;
+	struct isp4if_gpu_mem_info *meta_info_buf[ISP4IF_MAX_STREAM_BUF_COUNT];
+};
+
+static inline void isp4if_split_addr64(u64 addr, u32 *lo, u32 *hi)
+{
+	if (lo)
+		*lo = addr & 0xffffffff;
+
+	if (hi)
+		*hi = addr >> 32;
+}
+
+static inline u64 isp4if_join_addr64(u32 lo, u32 hi)
+{
+	return (((u64)hi) << 32) | (u64)lo;
+}
+
+int isp4if_f2h_resp(struct isp4_interface *ispif, enum isp4if_stream_id stream,
+		    struct isp4fw_resp *resp);
+
+int isp4if_send_command(struct isp4_interface *ispif, u32 cmd_id, const void *package,
+			u32 package_size);
+
+int isp4if_send_command_sync(struct isp4_interface *ispif, u32 cmd_id, const void *package,
+			     u32 package_size);
+
+struct isp4if_cmd_element *isp4if_rm_cmd_from_cmdq(struct isp4_interface *ispif, u32 seq_num,
+						   u32 cmd_id);
+
+void isp4if_clear_cmdq(struct isp4_interface *ispif);
+
+void isp4if_clear_bufq(struct isp4_interface *ispif);
+
+void isp4if_dealloc_buffer_node(struct isp4if_img_buf_node *buf_node);
+
+struct isp4if_img_buf_node *isp4if_alloc_buffer_node(struct isp4if_img_buf_info *buf_info);
+
+struct isp4if_img_buf_node *isp4if_dequeue_buffer(struct isp4_interface *ispif);
+
+int isp4if_queue_buffer(struct isp4_interface *ispif, struct isp4if_img_buf_node *buf_node);
+
+int isp4if_stop(struct isp4_interface *ispif);
+
+int isp4if_start(struct isp4_interface *ispif);
+
+int isp4if_deinit(struct isp4_interface *ispif);
+
+int isp4if_init(struct isp4_interface *ispif, struct device *dev, void __iomem *isp_mmio);
+
+#endif /* _ISP4_INTERFACE_H_ */
diff --git a/drivers/media/platform/amd/isp4/isp4_subdev.c b/drivers/media/platform/amd/isp4/isp4_subdev.c
new file mode 100644
index 000000000000..21de6bc7fce0
--- /dev/null
+++ b/drivers/media/platform/amd/isp4/isp4_subdev.c
@@ -0,0 +1,1057 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#include <linux/pm_domain.h>
+#include <linux/units.h>
+
+#include "isp4_debug.h"
+#include "isp4_fw_cmd_resp.h"
+#include "isp4_interface.h"
+#include "isp4.h"
+
+#define ISP4SD_MIN_BUF_CNT_BEF_START_STREAM 4
+
+#define ISP4SD_PERFORMANCE_STATE_LOW 0
+#define ISP4SD_PERFORMANCE_STATE_HIGH 1
+
+/* align 32KB */
+#define ISP4SD_META_BUF_SIZE ALIGN(sizeof(struct isp4fw_meta_info), 0x8000)
+
+#define to_isp4_subdev(v4l2_sdev)  \
+	container_of(v4l2_sdev, struct isp4_subdev, sdev)
+
+static const char *isp4sd_entity_name = "amd isp4";
+
+static const char *isp4sd_thread_name[ISP4SD_MAX_FW_RESP_STREAM_NUM] = {
+	"amd_isp4_thread_global",
+	"amd_isp4_thread_stream1",
+};
+
+static void isp4sd_module_enable(struct isp4_subdev *isp_subdev, bool enable)
+{
+	if (isp_subdev->enable_gpio) {
+		gpiod_set_value(isp_subdev->enable_gpio, enable ? 1 : 0);
+		dev_dbg(isp_subdev->dev, "%s isp_subdev module\n",
+			enable ? "enable" : "disable");
+	}
+}
+
+static int isp4sd_setup_fw_mem_pool(struct isp4_subdev *isp_subdev)
+{
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct isp4fw_cmd_send_buffer buf_type;
+	struct device *dev = isp_subdev->dev;
+	int ret;
+
+	if (!ispif->fw_mem_pool) {
+		dev_err(dev, "fail to alloc mem pool\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * The struct will be shared with ISP FW, use memset() to guarantee padding bits are
+	 * zeroed, since this is not guaranteed on all compilers.
+	 */
+	memset(&buf_type, 0, sizeof(buf_type));
+	buf_type.buffer_type = BUFFER_TYPE_MEM_POOL;
+	buf_type.buffer.vmid_space.bit.space = ADDR_SPACE_TYPE_GPU_VA;
+	isp4if_split_addr64(ispif->fw_mem_pool->gpu_mc_addr,
+			    &buf_type.buffer.buf_base_a_lo,
+			    &buf_type.buffer.buf_base_a_hi);
+	buf_type.buffer.buf_size_a = ispif->fw_mem_pool->mem_size;
+
+	ret = isp4if_send_command(ispif, CMD_ID_SEND_BUFFER,
+				  &buf_type, sizeof(buf_type));
+	if (ret) {
+		dev_err(dev, "send fw mem pool 0x%llx(%u) fail %d\n",
+			ispif->fw_mem_pool->gpu_mc_addr, buf_type.buffer.buf_size_a, ret);
+		return ret;
+	}
+
+	dev_dbg(dev, "send fw mem pool 0x%llx(%u) suc\n",
+		ispif->fw_mem_pool->gpu_mc_addr, buf_type.buffer.buf_size_a);
+
+	return 0;
+}
+
+static int isp4sd_set_stream_path(struct isp4_subdev *isp_subdev)
+{
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct isp4fw_cmd_set_stream_cfg cmd;
+	struct device *dev = isp_subdev->dev;
+
+	/*
+	 * The struct will be shared with ISP FW, use memset() to guarantee padding bits are
+	 * zeroed, since this is not guaranteed on all compilers.
+	 */
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.stream_cfg.mipi_pipe_path_cfg.isp4fw_sensor_id = SENSOR_ID_ON_MIPI0;
+	cmd.stream_cfg.mipi_pipe_path_cfg.b_enable = true;
+	cmd.stream_cfg.isp_pipe_path_cfg.isp_pipe_id = MIPI0_ISP_PIPELINE_ID;
+
+	cmd.stream_cfg.b_enable_tnr = true;
+	dev_dbg(dev, "isp4fw_sensor_id %d, pipeId 0x%x EnableTnr %u\n",
+		cmd.stream_cfg.mipi_pipe_path_cfg.isp4fw_sensor_id,
+		cmd.stream_cfg.isp_pipe_path_cfg.isp_pipe_id,
+		cmd.stream_cfg.b_enable_tnr);
+
+	return isp4if_send_command(ispif, CMD_ID_SET_STREAM_CONFIG,
+				   &cmd, sizeof(cmd));
+}
+
+static int isp4sd_send_meta_buf(struct isp4_subdev *isp_subdev)
+{
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct isp4fw_cmd_send_buffer buf_type;
+	struct device *dev = isp_subdev->dev;
+	int i;
+
+	/*
+	 * The struct will be shared with ISP FW, use memset() to guarantee padding bits are
+	 * zeroed, since this is not guaranteed on all compilers.
+	 */
+	memset(&buf_type, 0, sizeof(buf_type));
+	for (i = 0; i < ISP4IF_MAX_STREAM_BUF_COUNT; i++) {
+		struct isp4if_gpu_mem_info *meta_info_buf =
+				isp_subdev->ispif.meta_info_buf[i];
+		int ret;
+
+		if (!meta_info_buf) {
+			dev_err(dev, "fail for no meta info buf(%u)\n", i);
+			return -ENOMEM;
+		}
+
+		buf_type.buffer_type = BUFFER_TYPE_META_INFO;
+		buf_type.buffer.vmid_space.bit.space = ADDR_SPACE_TYPE_GPU_VA;
+		isp4if_split_addr64(meta_info_buf->gpu_mc_addr,
+				    &buf_type.buffer.buf_base_a_lo,
+				    &buf_type.buffer.buf_base_a_hi);
+		buf_type.buffer.buf_size_a = meta_info_buf->mem_size;
+		ret = isp4if_send_command(ispif, CMD_ID_SEND_BUFFER,
+					  &buf_type, sizeof(buf_type));
+		if (ret) {
+			dev_err(dev, "send meta info(%u) fail\n", i);
+			return ret;
+		}
+	}
+
+	dev_dbg(dev, "send meta info suc\n");
+	return 0;
+}
+
+static bool isp4sd_get_str_out_prop(struct isp4_subdev *isp_subdev,
+				    struct isp4fw_image_prop *out_prop,
+				    struct v4l2_subdev_state *state, u32 pad)
+{
+	struct device *dev = isp_subdev->dev;
+	struct v4l2_mbus_framefmt *format;
+
+	format = v4l2_subdev_state_get_format(state, pad, 0);
+	if (!format) {
+		dev_err(dev, "fail get subdev state format\n");
+		return false;
+	}
+
+	switch (format->code) {
+	case MEDIA_BUS_FMT_YUYV8_1_5X8:
+		out_prop->image_format = IMAGE_FORMAT_NV12;
+		out_prop->width = format->width;
+		out_prop->height = format->height;
+		out_prop->luma_pitch = format->width;
+		out_prop->chroma_pitch = out_prop->width;
+		break;
+	case MEDIA_BUS_FMT_YUYV8_1X16:
+		out_prop->image_format = IMAGE_FORMAT_YUV422INTERLEAVED;
+		out_prop->width = format->width;
+		out_prop->height = format->height;
+		out_prop->luma_pitch = format->width * 2;
+		out_prop->chroma_pitch = 0;
+		break;
+	default:
+		dev_err(dev, "fail for bad image format:0x%x\n",
+			format->code);
+		return false;
+	}
+
+	if (!out_prop->width || !out_prop->height)
+		return false;
+
+	return true;
+}
+
+static int isp4sd_kickoff_stream(struct isp4_subdev *isp_subdev, u32 w, u32 h)
+{
+	struct isp4sd_sensor_info *sensor_info = &isp_subdev->sensor_info;
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct device *dev = isp_subdev->dev;
+
+	if (sensor_info->status == ISP4SD_START_STATUS_STARTED)
+		return 0;
+
+	if (sensor_info->status == ISP4SD_START_STATUS_START_FAIL) {
+		dev_err(dev, "fail for previous start fail\n");
+		return -EINVAL;
+	}
+
+	dev_dbg(dev, "w:%u,h:%u\n", w, h);
+
+	if (isp4sd_send_meta_buf(isp_subdev)) {
+		dev_err(dev, "fail to send meta buf\n");
+		sensor_info->status = ISP4SD_START_STATUS_START_FAIL;
+		return -EINVAL;
+	}
+
+	sensor_info->status = ISP4SD_START_STATUS_OFF;
+
+	if (!sensor_info->start_stream_cmd_sent &&
+	    sensor_info->buf_sent_cnt >= ISP4SD_MIN_BUF_CNT_BEF_START_STREAM) {
+		int ret = isp4if_send_command(ispif, CMD_ID_START_STREAM,
+					      NULL, 0);
+		if (ret) {
+			dev_err(dev, "fail to start stream\n");
+			return ret;
+		}
+
+		sensor_info->start_stream_cmd_sent = true;
+	} else {
+		dev_dbg(dev,
+			"no send START_STREAM, start_sent %u, buf_sent %u\n",
+			sensor_info->start_stream_cmd_sent,
+			sensor_info->buf_sent_cnt);
+	}
+
+	return 0;
+}
+
+static int isp4sd_setup_output(struct isp4_subdev *isp_subdev,
+			       struct v4l2_subdev_state *state, u32 pad)
+{
+	struct isp4sd_output_info *output_info = &isp_subdev->sensor_info.output_info;
+	struct isp4sd_sensor_info *sensor_info = &isp_subdev->sensor_info;
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct isp4fw_cmd_set_out_ch_prop cmd_ch_prop;
+	struct isp4fw_cmd_enable_out_ch cmd_ch_en;
+	struct device *dev = isp_subdev->dev;
+	int ret;
+
+	if (output_info->start_status == ISP4SD_START_STATUS_STARTED)
+		return 0;
+
+	if (output_info->start_status == ISP4SD_START_STATUS_START_FAIL) {
+		dev_err(dev, "fail for previous start fail\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * The struct will be shared with ISP FW, use memset() to guarantee padding bits are
+	 * zeroed, since this is not guaranteed on all compilers.
+	 */
+	memset(&cmd_ch_prop, 0, sizeof(cmd_ch_prop));
+	cmd_ch_prop.ch = ISP_PIPE_OUT_CH_PREVIEW;
+
+	if (!isp4sd_get_str_out_prop(isp_subdev, &cmd_ch_prop.image_prop, state, pad)) {
+		dev_err(dev, "fail to get out prop\n");
+		return -EINVAL;
+	}
+
+	dev_dbg(dev, "channel:%s,fmt %s,w:h=%u:%u,lp:%u,cp%u\n",
+		isp4dbg_get_out_ch_str(cmd_ch_prop.ch),
+		isp4dbg_get_img_fmt_str(cmd_ch_prop.image_prop.image_format),
+		cmd_ch_prop.image_prop.width, cmd_ch_prop.image_prop.height,
+		cmd_ch_prop.image_prop.luma_pitch,
+		cmd_ch_prop.image_prop.chroma_pitch);
+
+	ret = isp4if_send_command(ispif, CMD_ID_SET_OUT_CHAN_PROP,
+				  &cmd_ch_prop, sizeof(cmd_ch_prop));
+	if (ret) {
+		output_info->start_status = ISP4SD_START_STATUS_START_FAIL;
+		dev_err(dev, "fail to set out prop\n");
+		return ret;
+	}
+
+	/*
+	 * The struct will be shared with ISP FW, use memset() to guarantee padding bits are
+	 * zeroed, since this is not guaranteed on all compilers.
+	 */
+	memset(&cmd_ch_en, 0, sizeof(cmd_ch_en));
+	cmd_ch_en.ch = ISP_PIPE_OUT_CH_PREVIEW;
+	cmd_ch_en.is_enable = true;
+	ret = isp4if_send_command(ispif, CMD_ID_ENABLE_OUT_CHAN,
+				  &cmd_ch_en, sizeof(cmd_ch_en));
+	if (ret) {
+		output_info->start_status = ISP4SD_START_STATUS_START_FAIL;
+		dev_err(dev, "fail to enable channel\n");
+		return ret;
+	}
+
+	dev_dbg(dev, "enable channel %s\n", isp4dbg_get_out_ch_str(cmd_ch_en.ch));
+
+	if (!sensor_info->start_stream_cmd_sent) {
+		ret = isp4sd_kickoff_stream(isp_subdev,
+					    cmd_ch_prop.image_prop.width,
+					    cmd_ch_prop.image_prop.height);
+		if (ret) {
+			dev_err(dev, "kickoff stream fail %d\n", ret);
+			return ret;
+		}
+		/*
+		 * sensor_info->start_stream_cmd_sent will be set to true
+		 * 1. in isp4sd_kickoff_stream, if app first send buffer then
+		 * start stream
+		 * 2. in isp_set_stream_buf, if app first start stream, then
+		 * send buffer
+		 * because ISP FW has the requirement, host needs to send buffer
+		 * before send start stream cmd
+		 */
+		if (sensor_info->start_stream_cmd_sent) {
+			sensor_info->status = ISP4SD_START_STATUS_STARTED;
+			output_info->start_status = ISP4SD_START_STATUS_STARTED;
+			dev_dbg(dev, "kickoff stream suc,start cmd sent\n");
+		}
+	} else {
+		dev_dbg(dev, "stream running, no need kickoff\n");
+		output_info->start_status = ISP4SD_START_STATUS_STARTED;
+	}
+
+	dev_dbg(dev, "setup output suc\n");
+	return 0;
+}
+
+static int isp4sd_init_stream(struct isp4_subdev *isp_subdev)
+{
+	struct device *dev = isp_subdev->dev;
+	int ret;
+
+	ret = isp4sd_setup_fw_mem_pool(isp_subdev);
+	if (ret) {
+		dev_err(dev, "fail to setup fw mem pool\n");
+		return ret;
+	}
+
+	ret = isp4sd_set_stream_path(isp_subdev);
+	if (ret) {
+		dev_err(dev, "fail to setup stream path\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void isp4sd_uninit_stream(struct isp4_subdev *isp_subdev,
+				 struct v4l2_subdev_state *state, u32 pad)
+{
+	struct isp4sd_sensor_info *sensor_info = &isp_subdev->sensor_info;
+	struct isp4sd_output_info *output_info = &sensor_info->output_info;
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct v4l2_mbus_framefmt *format;
+
+	format = v4l2_subdev_state_get_format(state, pad, 0);
+	if (!format) {
+		dev_err(isp_subdev->dev, "fail to get v4l2 format\n");
+	} else {
+		memset(format, 0, sizeof(*format));
+		format->code = MEDIA_BUS_FMT_YUYV8_1_5X8;
+	}
+
+	isp4if_clear_bufq(ispif);
+	isp4if_clear_cmdq(ispif);
+
+	sensor_info->start_stream_cmd_sent = false;
+	sensor_info->buf_sent_cnt = 0;
+
+	sensor_info->status = ISP4SD_START_STATUS_OFF;
+	output_info->start_status = ISP4SD_START_STATUS_OFF;
+}
+
+static void isp4sd_fw_resp_cmd_done(struct isp4_subdev *isp_subdev,
+				    enum isp4if_stream_id stream_id,
+				    struct isp4fw_resp_cmd_done *para)
+{
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct isp4if_cmd_element *ele =
+		isp4if_rm_cmd_from_cmdq(ispif, para->cmd_seq_num, para->cmd_id);
+	struct device *dev = isp_subdev->dev;
+
+	dev_dbg(dev, "stream %d,cmd %s(0x%08x)(%d),seq %u, ele %p\n",
+		stream_id,
+		isp4dbg_get_cmd_str(para->cmd_id),
+		para->cmd_id, para->cmd_status, para->cmd_seq_num,
+		ele);
+
+	if (ele) {
+		complete(&ele->cmd_done);
+		if (atomic_dec_and_test(&ele->refcnt))
+			kfree(ele);
+	}
+}
+
+static struct isp4fw_meta_info *isp4sd_get_meta_by_mc(struct isp4_subdev *isp_subdev,
+						      u64 mc)
+{
+	for (int i = 0; i < ISP4IF_MAX_STREAM_BUF_COUNT; i++) {
+		struct isp4if_gpu_mem_info *meta_info_buf =
+				isp_subdev->ispif.meta_info_buf[i];
+
+		if (meta_info_buf->gpu_mc_addr == mc)
+			return meta_info_buf->sys_addr;
+	}
+
+	return NULL;
+}
+
+static void isp4sd_send_meta_info(struct isp4_subdev *isp_subdev,
+				  u64 meta_info_mc)
+{
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct isp4fw_cmd_send_buffer buf_type;
+	struct device *dev = isp_subdev->dev;
+
+	if (isp_subdev->sensor_info.status != ISP4SD_START_STATUS_STARTED) {
+		dev_warn(dev, "not working status %i, meta_info 0x%llx\n",
+			 isp_subdev->sensor_info.status, meta_info_mc);
+		return;
+	}
+
+	/*
+	 * The struct will be shared with ISP FW, use memset() to guarantee padding bits are
+	 * zeroed, since this is not guaranteed on all compilers.
+	 */
+	memset(&buf_type, 0, sizeof(buf_type));
+	buf_type.buffer_type = BUFFER_TYPE_META_INFO;
+	buf_type.buffer.vmid_space.bit.space = ADDR_SPACE_TYPE_GPU_VA;
+	isp4if_split_addr64(meta_info_mc,
+			    &buf_type.buffer.buf_base_a_lo,
+			    &buf_type.buffer.buf_base_a_hi);
+	buf_type.buffer.buf_size_a = ISP4SD_META_BUF_SIZE;
+
+	if (isp4if_send_command(ispif, CMD_ID_SEND_BUFFER,
+				&buf_type, sizeof(buf_type)))
+		dev_err(dev, "fail send meta_info 0x%llx\n",
+			meta_info_mc);
+	else
+		dev_dbg(dev, "resend meta_info 0x%llx\n", meta_info_mc);
+}
+
+static void isp4sd_fw_resp_frame_done(struct isp4_subdev *isp_subdev,
+				      enum isp4if_stream_id stream_id,
+				      struct isp4fw_resp_param_package *para)
+{
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct device *dev = isp_subdev->dev;
+	struct isp4if_img_buf_node *prev;
+	struct isp4fw_meta_info *meta;
+	u64 mc;
+
+	mc = isp4if_join_addr64(para->package_addr_lo, para->package_addr_hi);
+	meta = isp4sd_get_meta_by_mc(isp_subdev, mc);
+	if (!meta) {
+		dev_err(dev, "fail to get meta from mc %llx\n", mc);
+		return;
+	}
+
+	dev_dbg(dev, "ts:%llu,streamId:%d,poc:%u,preview_en:%u,%s(%i)\n",
+		ktime_get_ns(), stream_id, meta->poc,
+		meta->preview.enabled,
+		isp4dbg_get_buf_done_str(meta->preview.status),
+		meta->preview.status);
+
+	if (meta->preview.enabled &&
+	    (meta->preview.status == BUFFER_STATUS_SKIPPED ||
+	     meta->preview.status == BUFFER_STATUS_DONE ||
+	     meta->preview.status == BUFFER_STATUS_DIRTY)) {
+		prev = isp4if_dequeue_buffer(ispif);
+		if (prev) {
+			isp4dbg_show_bufmeta_info(dev, "prev", &meta->preview,
+						  &prev->buf_info);
+			isp4vid_handle_frame_done(&isp_subdev->isp_vdev,
+						  &prev->buf_info);
+			isp4if_dealloc_buffer_node(prev);
+		} else {
+			dev_err(dev, "fail null prev buf\n");
+		}
+	} else if (meta->preview.enabled) {
+		dev_err(dev, "fail bad preview status %u(%s)\n",
+			meta->preview.status,
+			isp4dbg_get_buf_done_str(meta->preview.status));
+	}
+
+	if (isp_subdev->sensor_info.status == ISP4SD_START_STATUS_STARTED)
+		isp4sd_send_meta_info(isp_subdev, mc);
+
+	dev_dbg(dev, "stream_id:%d, status:%d\n", stream_id,
+		isp_subdev->sensor_info.status);
+}
+
+static void isp4sd_fw_resp_func(struct isp4_subdev *isp_subdev,
+				enum isp4if_stream_id stream_id)
+{
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct device *dev = isp_subdev->dev;
+	struct isp4fw_resp resp;
+
+	if (stream_id == ISP4IF_STREAM_ID_1)
+		isp_fw_log_print(isp_subdev);
+
+	while (true) {
+		if (isp4if_f2h_resp(ispif, stream_id, &resp)) {
+			/* Re-enable the interrupt */
+			isp4_intr_enable(isp_subdev, stream_id, true);
+			/*
+			 * Recheck to see if there is a new response.
+			 * To ensure that an in-flight interrupt is not lost,
+			 * enabling the interrupt must occur _before_ checking
+			 * for a new response, hence a memory barrier is needed.
+			 * Disable the interrupt again if there was a new response.
+			 */
+			mb();
+			if (likely(isp4if_f2h_resp(ispif, stream_id, &resp)))
+				break;
+
+			isp4_intr_enable(isp_subdev, stream_id, false);
+		}
+
+		switch (resp.resp_id) {
+		case RESP_ID_CMD_DONE:
+			isp4sd_fw_resp_cmd_done(isp_subdev, stream_id,
+						&resp.param.cmd_done);
+			break;
+		case RESP_ID_NOTI_FRAME_DONE:
+			isp4sd_fw_resp_frame_done(isp_subdev, stream_id,
+						  &resp.param.frame_done);
+			break;
+		default:
+			dev_err(dev, "-><- fail respid %s(0x%x)\n",
+				isp4dbg_get_resp_str(resp.resp_id),
+				resp.resp_id);
+			break;
+		}
+	}
+}
+
+static s32 isp4sd_fw_resp_thread(void *context)
+{
+	struct isp4_subdev_thread_param *para = context;
+	struct isp4_subdev *isp_subdev = para->isp_subdev;
+	struct isp4sd_thread_handler *thread_ctx =
+			&isp_subdev->fw_resp_thread[para->idx];
+	struct device *dev = isp_subdev->dev;
+
+	dev_dbg(dev, "[%u] fw resp thread started\n", para->idx);
+	while (true) {
+		wait_event_interruptible(thread_ctx->waitq, thread_ctx->resp_ready);
+		thread_ctx->resp_ready = false;
+
+		if (kthread_should_stop()) {
+			dev_dbg(dev, "[%u] fw resp thread quit\n", para->idx);
+			break;
+		}
+
+		isp4sd_fw_resp_func(isp_subdev, para->idx);
+	}
+
+	return 0;
+}
+
+static int isp4sd_stop_resp_proc_threads(struct isp4_subdev *isp_subdev)
+{
+	int i;
+
+	for (i = 0; i < ISP4SD_MAX_FW_RESP_STREAM_NUM; i++) {
+		struct isp4sd_thread_handler *thread_ctx =
+				&isp_subdev->fw_resp_thread[i];
+
+		if (thread_ctx->thread) {
+			kthread_stop(thread_ctx->thread);
+			thread_ctx->thread = NULL;
+		}
+	}
+
+	return 0;
+}
+
+static int isp4sd_start_resp_proc_threads(struct isp4_subdev *isp_subdev)
+{
+	struct device *dev = isp_subdev->dev;
+	int i;
+
+	for (i = 0; i < ISP4SD_MAX_FW_RESP_STREAM_NUM; i++) {
+		struct isp4sd_thread_handler *thread_ctx = &isp_subdev->fw_resp_thread[i];
+
+		isp_subdev->isp_resp_para[i].idx = i;
+		isp_subdev->isp_resp_para[i].isp_subdev = isp_subdev;
+		init_waitqueue_head(&thread_ctx->waitq);
+		thread_ctx->resp_ready = false;
+
+		thread_ctx->thread = kthread_run(isp4sd_fw_resp_thread,
+						 &isp_subdev->isp_resp_para[i],
+						 isp4sd_thread_name[i]);
+		if (IS_ERR(thread_ctx->thread)) {
+			dev_err(dev, "create thread [%d] fail\n", i);
+			thread_ctx->thread = NULL;
+			isp4sd_stop_resp_proc_threads(isp_subdev);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int isp4sd_pwroff_and_deinit(struct isp4_subdev *isp_subdev)
+{
+	struct isp4sd_sensor_info *sensor_info = &isp_subdev->sensor_info;
+	unsigned int perf_state = ISP4SD_PERFORMANCE_STATE_LOW;
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct device *dev = isp_subdev->dev;
+	int ret;
+
+	if (sensor_info->status == ISP4SD_START_STATUS_STARTED) {
+		dev_err(dev, "fail for stream still running\n");
+		return -EINVAL;
+	}
+
+	sensor_info->status = ISP4SD_START_STATUS_OFF;
+
+	if (isp_subdev->irq_enabled) {
+		for (int i = 0; i < ISP4SD_MAX_FW_RESP_STREAM_NUM; i++)
+			disable_irq(isp_subdev->irq[i]);
+		isp_subdev->irq_enabled = false;
+	}
+
+	isp4sd_stop_resp_proc_threads(isp_subdev);
+	dev_dbg(dev, "isp_subdev stop resp proc streads suc");
+
+	isp4if_stop(ispif);
+
+	ret = dev_pm_genpd_set_performance_state(dev, perf_state);
+	if (ret)
+		dev_err(dev, "fail to set isp_subdev performance state %u,ret %d\n",
+			perf_state, ret);
+
+	/* hold ccpu reset */
+	isp4hw_wreg(isp_subdev->mmio, ISP_SOFT_RESET, 0);
+	isp4hw_wreg(isp_subdev->mmio, ISP_POWER_STATUS, 0);
+	ret = pm_runtime_put_sync(dev);
+	if (ret)
+		dev_err(dev, "power off isp_subdev fail %d\n", ret);
+	else
+		dev_dbg(dev, "power off isp_subdev suc\n");
+
+	ispif->status = ISP4IF_STATUS_PWR_OFF;
+	isp4if_clear_cmdq(ispif);
+	isp4sd_module_enable(isp_subdev, false);
+
+	/*
+	 * When opening the camera, isp4sd_module_enable(isp_subdev, true) is called.
+	 * Hardware requires at least a 20ms delay between disabling and enabling the module,
+	 * so a sleep is added to ensure ISP stability during quick reopen scenarios.
+	 */
+	msleep(20);
+
+	return 0;
+}
+
+static int isp4sd_pwron_and_init(struct isp4_subdev *isp_subdev)
+{
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct device *dev = isp_subdev->dev;
+	int ret;
+
+	if (ispif->status == ISP4IF_STATUS_FW_RUNNING) {
+		dev_dbg(dev, "camera already opened, do nothing\n");
+		return 0;
+	}
+
+	isp4sd_module_enable(isp_subdev, true);
+
+	if (ispif->status < ISP4IF_STATUS_PWR_ON) {
+		unsigned int perf_state = ISP4SD_PERFORMANCE_STATE_HIGH;
+
+		ret = pm_runtime_resume_and_get(dev);
+		if (ret) {
+			dev_err(dev, "fail to power on isp_subdev ret %d\n",
+				ret);
+			goto err_deinit;
+		}
+
+		/* ISPPG ISP Power Status */
+		isp4hw_wreg(isp_subdev->mmio, ISP_POWER_STATUS, 0x7FF);
+		ret = dev_pm_genpd_set_performance_state(dev, perf_state);
+		if (ret) {
+			dev_err(dev,
+				"fail to set performance state %u, ret %d\n",
+				perf_state, ret);
+			goto err_deinit;
+		}
+
+		ispif->status = ISP4IF_STATUS_PWR_ON;
+	}
+
+	isp_subdev->sensor_info.start_stream_cmd_sent = false;
+	isp_subdev->sensor_info.buf_sent_cnt = 0;
+
+	ret = isp4if_start(ispif);
+	if (ret) {
+		dev_err(dev, "fail to start isp_subdev interface\n");
+		goto err_deinit;
+	}
+
+	if (isp4sd_start_resp_proc_threads(isp_subdev)) {
+		dev_err(dev, "isp_start_resp_proc_threads fail");
+		goto err_deinit;
+	}
+
+	dev_dbg(dev, "create resp threads ok");
+
+	for (int i = 0; i < ISP4SD_MAX_FW_RESP_STREAM_NUM; i++)
+		enable_irq(isp_subdev->irq[i]);
+	isp_subdev->irq_enabled = true;
+
+	return 0;
+err_deinit:
+	isp4sd_pwroff_and_deinit(isp_subdev);
+	return -EINVAL;
+}
+
+static int isp4sd_stop_stream(struct isp4_subdev *isp_subdev,
+			      struct v4l2_subdev_state *state, u32 pad)
+{
+	struct isp4sd_sensor_info *sensor_info = &isp_subdev->sensor_info;
+	struct isp4sd_output_info *output_info = &sensor_info->output_info;
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct device *dev = isp_subdev->dev;
+
+	guard(mutex)(&isp_subdev->ops_mutex);
+	dev_dbg(dev, "status %i\n", output_info->start_status);
+
+	if (output_info->start_status == ISP4SD_START_STATUS_STARTED) {
+		struct isp4fw_cmd_enable_out_ch cmd_ch_disable;
+		int ret;
+
+		/*
+		 * The struct will be shared with ISP FW, use memset() to guarantee
+		 * padding bits are zeroed, since this is not guaranteed on all compilers.
+		 */
+		memset(&cmd_ch_disable, 0, sizeof(cmd_ch_disable));
+		cmd_ch_disable.ch = ISP_PIPE_OUT_CH_PREVIEW;
+		/* `cmd_ch_disable.is_enable` is already false */
+		ret = isp4if_send_command_sync(ispif, CMD_ID_ENABLE_OUT_CHAN,
+					       &cmd_ch_disable,
+					       sizeof(cmd_ch_disable));
+		if (ret)
+			dev_err(dev, "fail to disable stream\n");
+		else
+			dev_dbg(dev, "wait disable stream suc\n");
+
+		ret = isp4if_send_command_sync(ispif, CMD_ID_STOP_STREAM,
+					       NULL, 0);
+		if (ret)
+			dev_err(dev, "fail to stop steam\n");
+		else
+			dev_dbg(dev, "wait stop stream suc\n");
+	}
+
+	isp4sd_uninit_stream(isp_subdev, state, pad);
+
+	/*
+	 * Return success to ensure the stop process proceeds,
+	 * and disregard any errors since they are not fatal.
+	 */
+	return 0;
+}
+
+static int isp4sd_start_stream(struct isp4_subdev *isp_subdev,
+			       struct v4l2_subdev_state *state, u32 pad)
+{
+	struct isp4sd_output_info *output_info =
+			&isp_subdev->sensor_info.output_info;
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct device *dev = isp_subdev->dev;
+	int ret;
+
+	guard(mutex)(&isp_subdev->ops_mutex);
+
+	if (ispif->status != ISP4IF_STATUS_FW_RUNNING) {
+		dev_err(dev, "fail, bad fsm %d", ispif->status);
+		return -EINVAL;
+	}
+
+	switch (output_info->start_status) {
+	case ISP4SD_START_STATUS_OFF:
+		break;
+	case ISP4SD_START_STATUS_STARTED:
+		dev_dbg(dev, "stream already started, do nothing\n");
+		return 0;
+	case ISP4SD_START_STATUS_START_FAIL:
+		dev_err(dev, "stream previously failed to start\n");
+		return -EINVAL;
+	}
+
+	ret = isp4sd_init_stream(isp_subdev);
+	if (ret) {
+		dev_err(dev, "fail to init isp_subdev stream\n");
+		goto err_stop_stream;
+	}
+
+	ret = isp4sd_setup_output(isp_subdev, state, pad);
+	if (ret) {
+		dev_err(dev, "fail to setup output\n");
+		goto err_stop_stream;
+	}
+
+	return 0;
+
+err_stop_stream:
+	isp4sd_stop_stream(isp_subdev, state, pad);
+	return ret;
+}
+
+static int isp4sd_ioc_send_img_buf(struct v4l2_subdev *sd,
+				   struct isp4if_img_buf_info *buf_info)
+{
+	struct isp4_subdev *isp_subdev = to_isp4_subdev(sd);
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct isp4if_img_buf_node *buf_node;
+	struct device *dev = isp_subdev->dev;
+	int ret;
+
+	guard(mutex)(&isp_subdev->ops_mutex);
+
+	if (ispif->status != ISP4IF_STATUS_FW_RUNNING) {
+		dev_err(dev, "fail send img buf for bad fsm %d\n",
+			ispif->status);
+		return -EINVAL;
+	}
+
+	buf_node = isp4if_alloc_buffer_node(buf_info);
+	if (!buf_node) {
+		dev_err(dev, "fail alloc sys img buf info node\n");
+		return -ENOMEM;
+	}
+
+	ret = isp4if_queue_buffer(ispif, buf_node);
+	if (ret) {
+		dev_err(dev, "fail to queue image buf, %d\n", ret);
+		goto error_release_buf_node;
+	}
+
+	if (!isp_subdev->sensor_info.start_stream_cmd_sent) {
+		isp_subdev->sensor_info.buf_sent_cnt++;
+
+		if (isp_subdev->sensor_info.buf_sent_cnt >=
+		    ISP4SD_MIN_BUF_CNT_BEF_START_STREAM) {
+			ret = isp4if_send_command(ispif, CMD_ID_START_STREAM,
+						  NULL, 0);
+			if (ret) {
+				dev_err(dev, "fail to START_STREAM");
+				goto error_release_buf_node;
+			}
+			isp_subdev->sensor_info.start_stream_cmd_sent = true;
+			isp_subdev->sensor_info.output_info.start_status =
+				ISP4SD_START_STATUS_STARTED;
+			isp_subdev->sensor_info.status =
+				ISP4SD_START_STATUS_STARTED;
+		} else {
+			dev_dbg(dev, "no send start, required %u, buf sent %u\n",
+				ISP4SD_MIN_BUF_CNT_BEF_START_STREAM,
+				isp_subdev->sensor_info.buf_sent_cnt);
+		}
+	}
+
+	return 0;
+
+error_release_buf_node:
+	isp4if_dealloc_buffer_node(buf_node);
+	return ret;
+}
+
+static int isp4sd_set_power(struct v4l2_subdev *sd, int on)
+{
+	struct isp4_subdev *isp_subdev = to_isp4_subdev(sd);
+
+	guard(mutex)(&isp_subdev->ops_mutex);
+	if (on)
+		return isp4sd_pwron_and_init(isp_subdev);
+	else
+		return isp4sd_pwroff_and_deinit(isp_subdev);
+}
+
+static const struct v4l2_subdev_core_ops isp4sd_core_ops = {
+	.s_power = isp4sd_set_power,
+};
+
+static const struct v4l2_subdev_video_ops isp4sd_video_ops = {
+	.s_stream = v4l2_subdev_s_stream_helper,
+};
+
+static int isp4sd_set_pad_format(struct v4l2_subdev *sd,
+				 struct v4l2_subdev_state *sd_state,
+				 struct v4l2_subdev_format *fmt)
+{
+	struct isp4sd_output_info *steam_info =
+		&(to_isp4_subdev(sd)->sensor_info.output_info);
+	struct v4l2_mbus_framefmt *format;
+
+	format = v4l2_subdev_state_get_format(sd_state, fmt->pad);
+
+	if (!format) {
+		dev_err(sd->dev, "fail to get state format\n");
+		return -EINVAL;
+	}
+
+	*format = fmt->format;
+	switch (format->code) {
+	case MEDIA_BUS_FMT_YUYV8_1_5X8:
+		steam_info->image_size = format->width * format->height * 3 / 2;
+		break;
+	case MEDIA_BUS_FMT_YUYV8_1X16:
+		steam_info->image_size = format->width * format->height * 2;
+		break;
+	default:
+		steam_info->image_size = 0;
+		break;
+	}
+	if (!steam_info->image_size) {
+		dev_err(sd->dev, "fail set pad format,code 0x%x,width %u, height %u\n",
+			format->code, format->width, format->height);
+		return -EINVAL;
+	}
+
+	dev_dbg(sd->dev, "set pad format suc, code:%x w:%u h:%u size:%u\n",
+		format->code, format->width, format->height, steam_info->image_size);
+
+	return 0;
+}
+
+static int isp4sd_enable_streams(struct v4l2_subdev *sd,
+				 struct v4l2_subdev_state *state, u32 pad,
+				 u64 streams_mask)
+{
+	struct isp4_subdev *isp_subdev = to_isp4_subdev(sd);
+
+	return isp4sd_start_stream(isp_subdev, state, pad);
+}
+
+static int isp4sd_disable_streams(struct v4l2_subdev *sd,
+				  struct v4l2_subdev_state *state, u32 pad,
+				  u64 streams_mask)
+{
+	struct isp4_subdev *isp_subdev = to_isp4_subdev(sd);
+
+	return isp4sd_stop_stream(isp_subdev, state, pad);
+}
+
+static const struct v4l2_subdev_pad_ops isp4sd_pad_ops = {
+	.get_fmt = v4l2_subdev_get_fmt,
+	.set_fmt = isp4sd_set_pad_format,
+	.enable_streams = isp4sd_enable_streams,
+	.disable_streams = isp4sd_disable_streams,
+};
+
+static const struct v4l2_subdev_ops isp4sd_subdev_ops = {
+	.core = &isp4sd_core_ops,
+	.video = &isp4sd_video_ops,
+	.pad = &isp4sd_pad_ops,
+};
+
+static int isp4sd_sdev_link_validate(struct media_link *link)
+{
+	return 0;
+}
+
+static const struct media_entity_operations isp4sd_sdev_ent_ops = {
+	.link_validate = isp4sd_sdev_link_validate,
+};
+
+static const struct isp4vid_ops isp4sd_isp4vid_ops = {
+	.send_buffer = isp4sd_ioc_send_img_buf,
+};
+
+int isp4sd_init(struct isp4_subdev *isp_subdev, struct v4l2_device *v4l2_dev,
+		int irq[ISP4SD_MAX_FW_RESP_STREAM_NUM])
+{
+	struct isp4sd_sensor_info *sensor_info = &isp_subdev->sensor_info;
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+	struct device *dev = v4l2_dev->dev;
+	int ret;
+
+	isp_subdev->dev = dev;
+	v4l2_subdev_init(&isp_subdev->sdev, &isp4sd_subdev_ops);
+	isp_subdev->sdev.owner = THIS_MODULE;
+	isp_subdev->sdev.dev = dev;
+	snprintf(isp_subdev->sdev.name, sizeof(isp_subdev->sdev.name), "%s",
+		 dev_name(dev));
+
+	isp_subdev->sdev.entity.name = isp4sd_entity_name;
+	isp_subdev->sdev.entity.function = MEDIA_ENT_F_PROC_VIDEO_ISP;
+	isp_subdev->sdev.entity.ops = &isp4sd_sdev_ent_ops;
+	isp_subdev->sdev_pad.flags = MEDIA_PAD_FL_SOURCE;
+	ret = media_entity_pads_init(&isp_subdev->sdev.entity, 1,
+				     &isp_subdev->sdev_pad);
+	if (ret) {
+		dev_err(dev, "fail to init isp4 subdev entity pad %d\n", ret);
+		return ret;
+	}
+
+	ret = v4l2_subdev_init_finalize(&isp_subdev->sdev);
+	if (ret < 0) {
+		dev_err(dev, "fail to init finalize isp4 subdev %d\n",
+			ret);
+		return ret;
+	}
+
+	ret = v4l2_device_register_subdev(v4l2_dev, &isp_subdev->sdev);
+	if (ret) {
+		dev_err(dev, "fail to register isp4 subdev to V4L2 device %d\n", ret);
+		goto err_media_clean_up;
+	}
+
+	isp4if_init(ispif, dev, isp_subdev->mmio);
+
+	mutex_init(&isp_subdev->ops_mutex);
+	sensor_info->status = ISP4SD_START_STATUS_OFF;
+
+	/* create ISP enable gpio control */
+	isp_subdev->enable_gpio = devm_gpiod_get(isp_subdev->dev,
+						 "enable_isp",
+						 GPIOD_OUT_LOW);
+	if (IS_ERR(isp_subdev->enable_gpio)) {
+		ret = PTR_ERR(isp_subdev->enable_gpio);
+		dev_err(dev, "fail to get gpiod %d\n", ret);
+		goto err_subdev_unreg;
+	}
+
+	for (int i = 0; i < ISP4SD_MAX_FW_RESP_STREAM_NUM; i++)
+		isp_subdev->irq[i] = irq[i];
+
+	isp_subdev->host2fw_seq_num = 1;
+	ispif->status = ISP4IF_STATUS_PWR_OFF;
+
+	ret = isp4vid_dev_init(&isp_subdev->isp_vdev, &isp_subdev->sdev,
+			       &isp4sd_isp4vid_ops);
+	if (ret)
+		goto err_subdev_unreg;
+
+	return 0;
+
+err_subdev_unreg:
+	v4l2_device_unregister_subdev(&isp_subdev->sdev);
+err_media_clean_up:
+	v4l2_subdev_cleanup(&isp_subdev->sdev);
+	media_entity_cleanup(&isp_subdev->sdev.entity);
+	return ret;
+}
+
+void isp4sd_deinit(struct isp4_subdev *isp_subdev)
+{
+	struct isp4_interface *ispif = &isp_subdev->ispif;
+
+	isp4vid_dev_deinit(&isp_subdev->isp_vdev);
+	v4l2_device_unregister_subdev(&isp_subdev->sdev);
+	media_entity_cleanup(&isp_subdev->sdev.entity);
+	isp4if_deinit(ispif);
+	isp4sd_module_enable(isp_subdev, false);
+
+	ispif->status = ISP4IF_STATUS_PWR_OFF;
+}
diff --git a/drivers/media/platform/amd/isp4/isp4_subdev.h b/drivers/media/platform/amd/isp4/isp4_subdev.h
new file mode 100644
index 000000000000..942245a6f356
--- /dev/null
+++ b/drivers/media/platform/amd/isp4/isp4_subdev.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef _ISP4_SUBDEV_H_
+#define _ISP4_SUBDEV_H_
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/pm_runtime.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <media/v4l2-device.h>
+
+#include "isp4_fw_cmd_resp.h"
+#include "isp4_hw_reg.h"
+#include "isp4_interface.h"
+#include "isp4_video.h"
+
+/*
+ * one is for none sesnor specefic response which is not used now
+ * another is for sensor specific response
+ */
+#define ISP4SD_MAX_FW_RESP_STREAM_NUM 2
+
+/*
+ * cmd used to register frame done callback, parameter is
+ * struct isp4sd_register_framedone_cb_param *
+ * when a image buffer is filled by ISP, ISP will call the registered callback.
+ * callback func prototype is isp4sd_framedone_cb, cb_ctx can be anything
+ * provided by caller which will be provided back as the first parameter of the
+ * callback function.
+ * both cb_func and cb_ctx are provide by caller, set cb_func to NULL to
+ * unregister the callback
+ */
+
+/* used to indicate the ISP status */
+enum isp4sd_status {
+	ISP4SD_STATUS_PWR_OFF,
+	ISP4SD_STATUS_PWR_ON,
+	ISP4SD_STATUS_FW_RUNNING,
+	ISP4SD_STATUS_MAX
+};
+
+/* used to indicate the status of sensor, output stream */
+enum isp4sd_start_status {
+	ISP4SD_START_STATUS_OFF,
+	ISP4SD_START_STATUS_STARTED,
+	ISP4SD_START_STATUS_START_FAIL,
+};
+
+struct isp4sd_img_buf_node {
+	struct list_head node;
+	struct isp4if_img_buf_info buf_info;
+};
+
+/* this is isp output after processing bayer raw input from sensor */
+struct isp4sd_output_info {
+	enum isp4sd_start_status start_status;
+	u32 image_size;
+};
+
+/*
+ * This struct represents the sensor info which is input or source of ISP,
+ * status is the sensor status
+ * output_info is the isp output info after ISP processing the sensor input,
+ * start_stream_cmd_sent mean if CMD_ID_START_STREAM has sent to fw.
+ * buf_sent_cnt is buffer count app has sent to receive the images
+ */
+struct isp4sd_sensor_info {
+	struct isp4sd_output_info output_info;
+	enum isp4sd_start_status status;
+	bool start_stream_cmd_sent;
+	u32 buf_sent_cnt;
+};
+
+/*
+ * Thread created by driver to receive fw response
+ * thread will be wakeup by fw to driver response interrupt
+ */
+struct isp4sd_thread_handler {
+	struct task_struct *thread;
+	wait_queue_head_t waitq;
+	bool resp_ready;
+};
+
+struct isp4_subdev_thread_param {
+	u32 idx;
+	struct isp4_subdev *isp_subdev;
+};
+
+struct isp4_subdev {
+	struct v4l2_subdev sdev;
+	struct isp4_interface ispif;
+	struct isp4vid_dev isp_vdev;
+
+	struct media_pad sdev_pad;
+
+	enum isp4sd_status isp_status;
+	struct mutex ops_mutex; /* ops_mutex */
+
+	/* Used to store fw cmds sent to FW whose response driver needs to wait for */
+	struct isp4sd_thread_handler fw_resp_thread[ISP4SD_MAX_FW_RESP_STREAM_NUM];
+
+	u32 host2fw_seq_num;
+
+	struct isp4sd_sensor_info sensor_info;
+
+	/* gpio descriptor */
+	struct gpio_desc *enable_gpio;
+	struct device *dev;
+	void __iomem *mmio;
+	struct isp4_subdev_thread_param isp_resp_para[ISP4SD_MAX_FW_RESP_STREAM_NUM];
+	int irq[ISP4SD_MAX_FW_RESP_STREAM_NUM];
+	bool irq_enabled;
+	/* spin lock to access ISP_SYS_INT0_EN exclusively */
+	spinlock_t irq_lock;
+#ifdef CONFIG_DEBUG_FS
+	bool enable_fw_log;
+	struct dentry *debugfs_dir;
+	char *fw_log_output;
+#endif
+};
+
+int isp4sd_init(struct isp4_subdev *isp_subdev, struct v4l2_device *v4l2_dev,
+		int irq[ISP4SD_MAX_FW_RESP_STREAM_NUM]);
+void isp4sd_deinit(struct isp4_subdev *isp_subdev);
+
+#endif /* _ISP4_SUBDEV_H_ */
diff --git a/drivers/media/platform/amd/isp4/isp4_video.c b/drivers/media/platform/amd/isp4/isp4_video.c
new file mode 100644
index 000000000000..5006e5021155
--- /dev/null
+++ b/drivers/media/platform/amd/isp4/isp4_video.c
@@ -0,0 +1,1165 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#include <linux/vmalloc.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-mc.h>
+
+#include "isp4_interface.h"
+#include "isp4_subdev.h"
+#include "isp4_video.h"
+
+#define ISP4VID_ISP_DRV_NAME "amd_isp_capture"
+#define ISP4VID_MAX_PREVIEW_FPS 30
+#define ISP4VID_DEFAULT_FMT isp4vid_formats[0]
+
+#define ISP4VID_PAD_VIDEO_OUTPUT 0
+
+/* timeperframe default */
+#define ISP4VID_ISP_TPF_DEFAULT isp4vid_tpfs[0]
+
+/* amdisp buffer for vb2 operations */
+struct isp4vid_vb2_buf {
+	struct device			*dev;
+	void				*vaddr;
+	unsigned long			size;
+	refcount_t			refcount;
+	struct dma_buf			*dbuf;
+	void				*bo;
+	u64				gpu_addr;
+	struct vb2_vmarea_handler	handler;
+};
+
+static void isp4vid_vb2_put(void *buf_priv);
+
+static const char *const isp4vid_video_dev_name = "Preview";
+
+/* Sizes must be in increasing order */
+static const struct v4l2_frmsize_discrete isp4vid_frmsize[] = {
+	{640, 360},
+	{640, 480},
+	{1280, 720},
+	{1280, 960},
+	{1920, 1080},
+	{1920, 1440},
+	{2560, 1440},
+	{2880, 1620},
+	{2880, 1624},
+	{2888, 1808},
+};
+
+static const u32 isp4vid_formats[] = {
+	V4L2_PIX_FMT_NV12,
+	V4L2_PIX_FMT_YUYV
+};
+
+/* timeperframe list */
+static const struct v4l2_fract isp4vid_tpfs[] = {
+	{ 1, ISP4VID_MAX_PREVIEW_FPS }
+};
+
+void isp4vid_handle_frame_done(struct isp4vid_dev *isp_vdev,
+			       const struct isp4if_img_buf_info *img_buf)
+{
+	struct isp4vid_capture_buffer *isp4vid_buf;
+	void *vbuf;
+
+	scoped_guard(mutex, &isp_vdev->buf_list_lock) {
+		/* Get the first entry of the list */
+		isp4vid_buf = list_first_entry_or_null(&isp_vdev->buf_list, typeof(*isp4vid_buf),
+						       list);
+		if (!isp4vid_buf)
+			return;
+
+		vbuf = vb2_plane_vaddr(&isp4vid_buf->vb2.vb2_buf, 0);
+
+		if (vbuf != img_buf->planes[0].sys_addr) {
+			dev_err(isp_vdev->dev, "Invalid vbuf");
+			return;
+		}
+
+		/* Remove this entry from the list */
+		list_del(&isp4vid_buf->list);
+	}
+
+	/* Fill the buffer */
+	isp4vid_buf->vb2.vb2_buf.timestamp = ktime_get_ns();
+	isp4vid_buf->vb2.sequence = isp_vdev->sequence++;
+	isp4vid_buf->vb2.field = V4L2_FIELD_ANY;
+
+	/* at most 2 planes */
+	vb2_set_plane_payload(&isp4vid_buf->vb2.vb2_buf,
+			      0, isp_vdev->format.sizeimage);
+
+	vb2_buffer_done(&isp4vid_buf->vb2.vb2_buf, VB2_BUF_STATE_DONE);
+
+	dev_dbg(isp_vdev->dev, "call vb2_buffer_done(size=%u)\n",
+		isp_vdev->format.sizeimage);
+}
+
+static unsigned int isp4vid_vb2_num_users(void *buf_priv)
+{
+	struct isp4vid_vb2_buf *buf = buf_priv;
+
+	return refcount_read(&buf->refcount);
+}
+
+static int isp4vid_vb2_mmap(void *buf_priv, struct vm_area_struct *vma)
+{
+	struct isp4vid_vb2_buf *buf = buf_priv;
+	int ret;
+
+	if (!buf) {
+		pr_err("fail no memory to map\n");
+		return -EINVAL;
+	}
+
+	ret = remap_vmalloc_range(vma, buf->vaddr, 0);
+	if (ret) {
+		dev_err(buf->dev, "fail remap vmalloc mem, %d\n", ret);
+		return ret;
+	}
+
+	/*
+	 * Make sure that vm_areas for 2 buffers won't be merged together
+	 */
+	vm_flags_set(vma, VM_DONTEXPAND);
+
+	/*
+	 * Use common vm_area operations to track buffer refcount.
+	 */
+	vma->vm_private_data	= &buf->handler;
+	vma->vm_ops		= &vb2_common_vm_ops;
+
+	vma->vm_ops->open(vma);
+
+	dev_dbg(buf->dev, "mmap isp user bo 0x%llx size %ld refcount %d\n",
+		buf->gpu_addr, buf->size, refcount_read(&buf->refcount));
+
+	return 0;
+}
+
+static void *isp4vid_vb2_vaddr(struct vb2_buffer *vb, void *buf_priv)
+{
+	struct isp4vid_vb2_buf *buf = buf_priv;
+
+	if (!buf->vaddr) {
+		dev_err(buf->dev,
+			"fail for buf vaddr is null\n");
+		return NULL;
+	}
+
+	return buf->vaddr;
+}
+
+static void isp4vid_vb2_detach_dmabuf(void *mem_priv)
+{
+	struct isp4vid_vb2_buf *buf = mem_priv;
+
+	dev_dbg(buf->dev, "detach dmabuf of isp user bo 0x%llx size %ld",
+		buf->gpu_addr, buf->size);
+
+	kfree(buf);
+}
+
+static void *isp4vid_vb2_attach_dmabuf(struct vb2_buffer *vb, struct device *dev,
+				       struct dma_buf *dbuf,
+				       unsigned long size)
+{
+	struct isp4vid_vb2_buf *dbg_buf = dbuf->priv;
+	struct isp4vid_vb2_buf *buf;
+
+	if (dbuf->size < size) {
+		dev_err(dev, "Invalid dmabuf size %zu %lu", dbuf->size, size);
+		return ERR_PTR(-EFAULT);
+	}
+
+	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	buf->dev = dev;
+	buf->dbuf = dbuf;
+	buf->size = size;
+
+	dev_dbg(dev, "attach dmabuf of isp user bo 0x%llx size %ld",
+		dbg_buf->gpu_addr, dbg_buf->size);
+
+	return buf;
+}
+
+static void isp4vid_vb2_unmap_dmabuf(void *mem_priv)
+{
+	struct isp4vid_vb2_buf *buf = mem_priv;
+	struct iosys_map map = IOSYS_MAP_INIT_VADDR(buf->vaddr);
+
+	dev_dbg(buf->dev, "unmap dmabuf of isp user bo 0x%llx size %ld",
+		buf->gpu_addr, buf->size);
+
+	dma_buf_vunmap_unlocked(buf->dbuf, &map);
+	buf->vaddr = NULL;
+}
+
+static int isp4vid_vb2_map_dmabuf(void *mem_priv)
+{
+	struct isp4vid_vb2_buf *buf = mem_priv, *mmap_buf;
+	struct iosys_map map;
+	int ret;
+
+	ret = dma_buf_vmap_unlocked(buf->dbuf, &map);
+	if (ret) {
+		dev_err(buf->dev, "vmap_unlocked fail");
+		return -EFAULT;
+	}
+
+	buf->vaddr = map.vaddr;
+	mmap_buf = buf->dbuf->priv;
+	buf->gpu_addr = mmap_buf->gpu_addr;
+
+	dev_dbg(buf->dev, "map dmabuf of isp user bo 0x%llx size %ld",
+		buf->gpu_addr, buf->size);
+
+	return 0;
+}
+
+#ifdef CONFIG_HAS_DMA
+struct isp4vid_vb2_amdgpu_attachment {
+	struct sg_table sgt;
+	enum dma_data_direction dma_dir;
+};
+
+static int isp4vid_dmabuf_ops_attach(struct dma_buf *dbuf,
+				     struct dma_buf_attachment *dbuf_attach)
+{
+	struct isp4vid_vb2_buf *buf = dbuf->priv;
+	int num_pages = PAGE_ALIGN(buf->size) / PAGE_SIZE;
+	struct isp4vid_vb2_amdgpu_attachment *attach;
+	void *vaddr = buf->vaddr;
+	struct scatterlist *sg;
+	struct sg_table *sgt;
+	int ret;
+	int i;
+
+	attach = kzalloc(sizeof(*attach), GFP_KERNEL);
+	if (!attach)
+		return -ENOMEM;
+
+	sgt = &attach->sgt;
+	ret = sg_alloc_table(sgt, num_pages, GFP_KERNEL);
+	if (ret) {
+		kfree(attach);
+		return ret;
+	}
+
+	for_each_sgtable_sg(sgt, sg, i) {
+		struct page *page = vmalloc_to_page(vaddr);
+
+		if (!page) {
+			sg_free_table(sgt);
+			kfree(attach);
+			return -ENOMEM;
+		}
+
+		sg_set_page(sg, page, PAGE_SIZE, 0);
+		vaddr = ((char *)vaddr) + PAGE_SIZE;
+	}
+
+	attach->dma_dir = DMA_NONE;
+	dbuf_attach->priv = attach;
+
+	return 0;
+}
+
+static void isp4vid_dmabuf_ops_detach(struct dma_buf *dbuf,
+				      struct dma_buf_attachment *dbuf_attach)
+{
+	struct isp4vid_vb2_amdgpu_attachment *attach = dbuf_attach->priv;
+	struct sg_table *sgt;
+
+	if (!attach) {
+		pr_err("fail invalid attach handler\n");
+		return;
+	}
+
+	sgt = &attach->sgt;
+
+	/* release the scatterlist cache */
+	if (attach->dma_dir != DMA_NONE)
+		dma_unmap_sgtable(dbuf_attach->dev, sgt, attach->dma_dir, 0);
+
+	sg_free_table(sgt);
+	kfree(attach);
+	dbuf_attach->priv = NULL;
+}
+
+static struct sg_table *isp4vid_dmabuf_ops_map(struct dma_buf_attachment *dbuf_attach,
+					       enum dma_data_direction dma_dir)
+{
+	struct isp4vid_vb2_amdgpu_attachment *attach = dbuf_attach->priv;
+	struct sg_table *sgt;
+
+	sgt = &attach->sgt;
+	/* return previously mapped sg table */
+	if (attach->dma_dir == dma_dir)
+		return sgt;
+
+	/* release any previous cache */
+	if (attach->dma_dir != DMA_NONE) {
+		dma_unmap_sgtable(dbuf_attach->dev, sgt, attach->dma_dir, 0);
+		attach->dma_dir = DMA_NONE;
+	}
+
+	/* mapping to the client with new direction */
+	if (dma_map_sgtable(dbuf_attach->dev, sgt, dma_dir, 0)) {
+		dev_err(dbuf_attach->dev, "fail to map scatterlist");
+		return ERR_PTR(-EIO);
+	}
+
+	attach->dma_dir = dma_dir;
+
+	return sgt;
+}
+
+static void isp4vid_dmabuf_ops_unmap(struct dma_buf_attachment *dbuf_attach,
+				     struct sg_table *sgt,
+				     enum dma_data_direction dma_dir)
+{
+	/* nothing to be done here */
+}
+
+static int isp4vid_dmabuf_ops_vmap(struct dma_buf *dbuf,
+				   struct iosys_map *map)
+{
+	struct isp4vid_vb2_buf *buf = dbuf->priv;
+
+	iosys_map_set_vaddr(map, buf->vaddr);
+
+	return 0;
+}
+
+static void isp4vid_dmabuf_ops_release(struct dma_buf *dbuf)
+{
+	struct isp4vid_vb2_buf *buf = dbuf->priv;
+
+	/* drop reference obtained in isp4vid_vb2_get_dmabuf */
+	if (dbuf != buf->dbuf)
+		isp4vid_vb2_put(buf);
+	else
+		kfree(buf);
+}
+
+static int isp4vid_dmabuf_ops_mmap(struct dma_buf *dbuf, struct vm_area_struct *vma)
+{
+	return isp4vid_vb2_mmap(dbuf->priv, vma);
+}
+
+static const struct dma_buf_ops isp4vid_dmabuf_ops = {
+	.attach = isp4vid_dmabuf_ops_attach,
+	.detach = isp4vid_dmabuf_ops_detach,
+	.map_dma_buf = isp4vid_dmabuf_ops_map,
+	.unmap_dma_buf = isp4vid_dmabuf_ops_unmap,
+	.vmap = isp4vid_dmabuf_ops_vmap,
+	.mmap = isp4vid_dmabuf_ops_mmap,
+	.release = isp4vid_dmabuf_ops_release,
+};
+
+static struct dma_buf *isp4vid_get_dmabuf(struct isp4vid_vb2_buf *buf, unsigned long flags)
+{
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+	struct dma_buf *dbuf;
+
+	if (WARN_ON(!buf->vaddr))
+		return NULL;
+
+	exp_info.ops = &isp4vid_dmabuf_ops;
+	exp_info.size = buf->size;
+	exp_info.flags = flags;
+	exp_info.priv = buf;
+
+	dbuf = dma_buf_export(&exp_info);
+	if (IS_ERR(dbuf))
+		return NULL;
+
+	return dbuf;
+}
+
+static struct dma_buf *isp4vid_vb2_get_dmabuf(struct vb2_buffer *vb, void *buf_priv,
+					      unsigned long flags)
+{
+	struct isp4vid_vb2_buf *buf = buf_priv;
+	struct dma_buf *dbuf;
+
+	dbuf = isp4vid_get_dmabuf(buf, flags);
+	if (!dbuf) {
+		dev_err(buf->dev, "fail to get isp dma buffer\n");
+		return NULL;
+	}
+
+	refcount_inc(&buf->refcount);
+
+	dev_dbg(buf->dev, "buf exported, refcount %d\n",
+		refcount_read(&buf->refcount));
+
+	return dbuf;
+}
+#endif /* CONFIG_HAS_DMA */
+
+static void isp4vid_vb2_put(void *buf_priv)
+{
+	struct isp4vid_vb2_buf *buf = buf_priv;
+
+	dev_dbg(buf->dev,
+		"release isp user bo 0x%llx size %ld refcount %d",
+		buf->gpu_addr, buf->size,
+		refcount_read(&buf->refcount));
+
+	if (refcount_dec_and_test(&buf->refcount)) {
+		isp_user_buffer_free(buf->bo);
+		vfree(buf->vaddr);
+		/*
+		 * Putting the implicit dmabuf frees `buf`. Freeing `buf` must
+		 * be done from the dmabuf release callback since dma_buf_put()
+		 * isn't always synchronous; it's just an fput(), which may be
+		 * deferred. Since the dmabuf release callback needs to access
+		 * `buf`, this means `buf` cannot be freed until then.
+		 */
+		dma_buf_put(buf->dbuf);
+	}
+}
+
+static void *isp4vid_vb2_alloc(struct vb2_buffer *vb, struct device *dev, unsigned long size)
+{
+	struct isp4vid_vb2_buf *buf;
+	u64 gpu_addr;
+	void *bo;
+	int ret;
+
+	buf = kzalloc(sizeof(*buf), GFP_KERNEL | vb->vb2_queue->gfp_flags);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	buf->dev = dev;
+	buf->size = size;
+	buf->vaddr = vmalloc_user(buf->size);
+	if (!buf->vaddr) {
+		dev_err(dev, "fail to vmalloc buffer\n");
+		goto free_buf;
+	}
+
+	buf->handler.refcount = &buf->refcount;
+	buf->handler.put = isp4vid_vb2_put;
+	buf->handler.arg = buf;
+
+	/* get implicit dmabuf */
+	buf->dbuf = isp4vid_get_dmabuf(buf, 0);
+	if (!buf->dbuf) {
+		dev_err(dev, "fail to get implicit dmabuf\n");
+		goto free_user_vmem;
+	}
+
+	/* create isp user BO and obtain gpu_addr */
+	ret = isp_user_buffer_alloc(dev, buf->dbuf, &bo, &gpu_addr);
+	if (ret) {
+		dev_err(dev, "fail to create isp user BO\n");
+		goto put_dmabuf;
+	}
+
+	buf->bo = bo;
+	buf->gpu_addr = gpu_addr;
+
+	refcount_set(&buf->refcount, 1);
+
+	dev_dbg(dev, "allocated isp user bo 0x%llx size %ld refcount %d\n",
+		buf->gpu_addr, buf->size, refcount_read(&buf->refcount));
+
+	return buf;
+
+put_dmabuf:
+	dma_buf_put(buf->dbuf);
+free_user_vmem:
+	vfree(buf->vaddr);
+free_buf:
+	ret = buf->vaddr ? -EINVAL : -ENOMEM;
+	kfree(buf);
+	return ERR_PTR(ret);
+}
+
+static const struct vb2_mem_ops isp4vid_vb2_memops = {
+	.alloc		= isp4vid_vb2_alloc,
+	.put		= isp4vid_vb2_put,
+#ifdef CONFIG_HAS_DMA
+	.get_dmabuf	= isp4vid_vb2_get_dmabuf,
+#endif
+	.map_dmabuf	= isp4vid_vb2_map_dmabuf,
+	.unmap_dmabuf	= isp4vid_vb2_unmap_dmabuf,
+	.attach_dmabuf	= isp4vid_vb2_attach_dmabuf,
+	.detach_dmabuf	= isp4vid_vb2_detach_dmabuf,
+	.vaddr		= isp4vid_vb2_vaddr,
+	.mmap		= isp4vid_vb2_mmap,
+	.num_users	= isp4vid_vb2_num_users,
+};
+
+static const struct v4l2_pix_format isp4vid_fmt_default = {
+	.width = 1920,
+	.height = 1080,
+	.pixelformat = ISP4VID_DEFAULT_FMT,
+	.field = V4L2_FIELD_NONE,
+	.colorspace = V4L2_COLORSPACE_SRGB,
+};
+
+static void isp4vid_capture_return_all_buffers(struct isp4vid_dev *isp_vdev,
+					       enum vb2_buffer_state state)
+{
+	struct isp4vid_capture_buffer *vbuf, *node;
+
+	scoped_guard(mutex, &isp_vdev->buf_list_lock) {
+		list_for_each_entry_safe(vbuf, node, &isp_vdev->buf_list, list)
+			vb2_buffer_done(&vbuf->vb2.vb2_buf, state);
+		INIT_LIST_HEAD(&isp_vdev->buf_list);
+	}
+
+	dev_dbg(isp_vdev->dev, "call vb2_buffer_done(%d)\n", state);
+}
+
+static int isp4vid_vdev_link_validate(struct media_link *link)
+{
+	return 0;
+}
+
+static const struct media_entity_operations isp4vid_vdev_ent_ops = {
+	.link_validate = isp4vid_vdev_link_validate,
+};
+
+static const struct v4l2_file_operations isp4vid_vdev_fops = {
+	.owner = THIS_MODULE,
+	.open = v4l2_fh_open,
+	.release = vb2_fop_release,
+	.read = vb2_fop_read,
+	.poll = vb2_fop_poll,
+	.unlocked_ioctl = video_ioctl2,
+	.mmap = vb2_fop_mmap,
+};
+
+static int isp4vid_ioctl_querycap(struct file *file, void *fh, struct v4l2_capability *cap)
+{
+	struct isp4vid_dev *isp_vdev = video_drvdata(file);
+
+	strscpy(cap->driver, ISP4VID_ISP_DRV_NAME, sizeof(cap->driver));
+	snprintf(cap->card, sizeof(cap->card), "%s", ISP4VID_ISP_DRV_NAME);
+	cap->capabilities |= V4L2_CAP_STREAMING | V4L2_CAP_VIDEO_CAPTURE;
+
+	dev_dbg(isp_vdev->dev, "%s|capabilities=0x%X", isp_vdev->vdev.name, cap->capabilities);
+
+	return 0;
+}
+
+static int isp4vid_g_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f)
+{
+	struct isp4vid_dev *isp_vdev = video_drvdata(file);
+
+	f->fmt.pix = isp_vdev->format;
+
+	return 0;
+}
+
+static int isp4vid_fill_buffer_size(struct v4l2_pix_format *fmt)
+{
+	int ret = 0;
+
+	switch (fmt->pixelformat) {
+	case V4L2_PIX_FMT_NV12:
+		fmt->bytesperline = fmt->width;
+		fmt->sizeimage = fmt->bytesperline * fmt->height * 3 / 2;
+		break;
+	case V4L2_PIX_FMT_YUYV:
+		fmt->bytesperline = fmt->width * 2;
+		fmt->sizeimage = fmt->bytesperline * fmt->height;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int isp4vid_try_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f)
+{
+	struct isp4vid_dev *isp_vdev = video_drvdata(file);
+	struct v4l2_pix_format *format = &f->fmt.pix;
+	const struct v4l2_frmsize_discrete *fsz;
+	int i;
+
+	if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	/*
+	 * Check if the hardware supports the requested format, use the default
+	 * format otherwise.
+	 */
+	for (i = 0; i < ARRAY_SIZE(isp4vid_formats); i++)
+		if (isp4vid_formats[i] == format->pixelformat)
+			break;
+
+	if (i == ARRAY_SIZE(isp4vid_formats))
+		format->pixelformat = ISP4VID_DEFAULT_FMT;
+
+	switch (format->pixelformat) {
+	case V4L2_PIX_FMT_NV12:
+	case V4L2_PIX_FMT_YUYV:
+		fsz = v4l2_find_nearest_size(isp4vid_frmsize, ARRAY_SIZE(isp4vid_frmsize),
+					     width, height, format->width, format->height);
+		format->width = fsz->width;
+		format->height = fsz->height;
+		break;
+	default:
+		dev_err(isp_vdev->dev, "%s|unsupported fmt=%u", isp_vdev->vdev.name,
+			format->pixelformat);
+		return -EINVAL;
+	}
+
+	/* There is no need to check the return value, as failure will never happen here */
+	isp4vid_fill_buffer_size(format);
+
+	if (format->field == V4L2_FIELD_ANY)
+		format->field = isp4vid_fmt_default.field;
+
+	if (format->colorspace == V4L2_COLORSPACE_DEFAULT)
+		format->colorspace = isp4vid_fmt_default.colorspace;
+
+	return 0;
+}
+
+static int isp4vid_set_fmt_2_isp(struct v4l2_subdev *sdev, struct v4l2_pix_format *pix_fmt)
+{
+	struct v4l2_subdev_format fmt = {};
+
+	switch (pix_fmt->pixelformat) {
+	case V4L2_PIX_FMT_NV12:
+		fmt.format.code = MEDIA_BUS_FMT_YUYV8_1_5X8;
+		break;
+	case V4L2_PIX_FMT_YUYV:
+		fmt.format.code = MEDIA_BUS_FMT_YUYV8_1X16;
+		break;
+	default:
+		return -EINVAL;
+	}
+	fmt.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+	fmt.pad = ISP4VID_PAD_VIDEO_OUTPUT;
+	fmt.format.width = pix_fmt->width;
+	fmt.format.height = pix_fmt->height;
+	return v4l2_subdev_call(sdev, pad, set_fmt, NULL, &fmt);
+}
+
+static int isp4vid_s_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f)
+{
+	struct isp4vid_dev *isp_vdev = video_drvdata(file);
+	int ret;
+
+	/* Do not change the format while stream is on */
+	if (vb2_is_busy(&isp_vdev->vbq))
+		return -EBUSY;
+
+	if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	ret = isp4vid_try_fmt_vid_cap(file, priv, f);
+	if (ret)
+		return ret;
+
+	dev_dbg(isp_vdev->dev, "%s|width height:%ux%u->%ux%u",
+		isp_vdev->vdev.name,
+		isp_vdev->format.width, isp_vdev->format.height,
+		f->fmt.pix.width, f->fmt.pix.height);
+	dev_dbg(isp_vdev->dev, "%s|pixelformat:0x%x-0x%x",
+		isp_vdev->vdev.name, isp_vdev->format.pixelformat,
+		f->fmt.pix.pixelformat);
+	dev_dbg(isp_vdev->dev, "%s|bytesperline:%u->%u",
+		isp_vdev->vdev.name, isp_vdev->format.bytesperline,
+		f->fmt.pix.bytesperline);
+	dev_dbg(isp_vdev->dev, "%s|sizeimage:%u->%u",
+		isp_vdev->vdev.name, isp_vdev->format.sizeimage,
+		f->fmt.pix.sizeimage);
+
+	isp_vdev->format = f->fmt.pix;
+	ret = isp4vid_set_fmt_2_isp(isp_vdev->isp_sdev, &isp_vdev->format);
+
+	return ret;
+}
+
+static int isp4vid_enum_fmt_vid_cap(struct file *file, void *priv, struct v4l2_fmtdesc *f)
+{
+	struct isp4vid_dev *isp_vdev = video_drvdata(file);
+
+	switch (f->index) {
+	case 0:
+		f->pixelformat = V4L2_PIX_FMT_NV12;
+		break;
+	case 1:
+		f->pixelformat = V4L2_PIX_FMT_YUYV;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	dev_dbg(isp_vdev->dev, "%s|index=%d, pixelformat=0x%X",
+		isp_vdev->vdev.name, f->index, f->pixelformat);
+
+	return 0;
+}
+
+static int isp4vid_enum_framesizes(struct file *file, void *fh, struct v4l2_frmsizeenum *fsize)
+{
+	struct isp4vid_dev *isp_vdev = video_drvdata(file);
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(isp4vid_formats); i++) {
+		if (isp4vid_formats[i] == fsize->pixel_format)
+			break;
+	}
+	if (i == ARRAY_SIZE(isp4vid_formats))
+		return -EINVAL;
+
+	if (fsize->index < ARRAY_SIZE(isp4vid_frmsize)) {
+		fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE;
+		fsize->discrete = isp4vid_frmsize[fsize->index];
+		dev_dbg(isp_vdev->dev, "%s|size[%d]=%dx%d",
+			isp_vdev->vdev.name, fsize->index,
+			fsize->discrete.width, fsize->discrete.height);
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int isp4vid_ioctl_enum_frameintervals(struct file *file, void *priv,
+					     struct v4l2_frmivalenum *fival)
+{
+	struct isp4vid_dev *isp_vdev = video_drvdata(file);
+	int i;
+
+	if (fival->index >= ARRAY_SIZE(isp4vid_tpfs))
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(isp4vid_formats); i++)
+		if (isp4vid_formats[i] == fival->pixel_format)
+			break;
+
+	if (i == ARRAY_SIZE(isp4vid_formats))
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(isp4vid_frmsize); i++)
+		if (isp4vid_frmsize[i].width == fival->width &&
+		    isp4vid_frmsize[i].height == fival->height)
+			break;
+
+	if (i == ARRAY_SIZE(isp4vid_frmsize))
+		return -EINVAL;
+
+	fival->type = V4L2_FRMIVAL_TYPE_DISCRETE;
+	fival->discrete = isp4vid_tpfs[fival->index];
+	v4l2_simplify_fraction(&fival->discrete.numerator,
+			       &fival->discrete.denominator, 8, 333);
+
+	dev_dbg(isp_vdev->dev, "%s|interval[%d]=%d/%d",
+		isp_vdev->vdev.name, fival->index,
+		fival->discrete.numerator,
+		fival->discrete.denominator);
+
+	return 0;
+}
+
+static int isp4vid_ioctl_g_param(struct file *file, void *priv, struct v4l2_streamparm *param)
+{
+	struct v4l2_captureparm *capture = &param->parm.capture;
+	struct isp4vid_dev *isp_vdev = video_drvdata(file);
+
+	if (param->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	capture->capability   = V4L2_CAP_TIMEPERFRAME;
+	capture->timeperframe = isp_vdev->timeperframe;
+	capture->readbuffers  = 0;
+
+	dev_dbg(isp_vdev->dev, "%s|timeperframe=%d/%d", isp_vdev->vdev.name,
+		capture->timeperframe.numerator,
+		capture->timeperframe.denominator);
+
+	return 0;
+}
+
+static const struct v4l2_ioctl_ops isp4vid_vdev_ioctl_ops = {
+	/* VIDIOC_QUERYCAP handler */
+	.vidioc_querycap = isp4vid_ioctl_querycap,
+
+	/* VIDIOC_ENUM_FMT handlers */
+	.vidioc_enum_fmt_vid_cap = isp4vid_enum_fmt_vid_cap,
+
+	/* VIDIOC_G_FMT handlers */
+	.vidioc_g_fmt_vid_cap = isp4vid_g_fmt_vid_cap,
+
+	/* VIDIOC_S_FMT handlers */
+	.vidioc_s_fmt_vid_cap = isp4vid_s_fmt_vid_cap,
+
+	/* VIDIOC_TRY_FMT handlers */
+	.vidioc_try_fmt_vid_cap = isp4vid_try_fmt_vid_cap,
+
+	/* Buffer handlers */
+	.vidioc_reqbufs = vb2_ioctl_reqbufs,
+	.vidioc_querybuf = vb2_ioctl_querybuf,
+	.vidioc_qbuf = vb2_ioctl_qbuf,
+	.vidioc_expbuf = vb2_ioctl_expbuf,
+	.vidioc_dqbuf = vb2_ioctl_dqbuf,
+	.vidioc_create_bufs = vb2_ioctl_create_bufs,
+	.vidioc_prepare_buf = vb2_ioctl_prepare_buf,
+
+	/* Stream on/off */
+	.vidioc_streamon = vb2_ioctl_streamon,
+	.vidioc_streamoff = vb2_ioctl_streamoff,
+
+	/* Stream type-dependent parameter ioctls */
+	.vidioc_g_parm        = isp4vid_ioctl_g_param,
+	.vidioc_s_parm        = isp4vid_ioctl_g_param,
+
+	.vidioc_enum_framesizes = isp4vid_enum_framesizes,
+	.vidioc_enum_frameintervals = isp4vid_ioctl_enum_frameintervals,
+
+};
+
+static unsigned int isp4vid_get_image_size(struct v4l2_pix_format *fmt)
+{
+	switch (fmt->pixelformat) {
+	case V4L2_PIX_FMT_NV12:
+		return fmt->width * fmt->height * 3 / 2;
+	case V4L2_PIX_FMT_YUYV:
+		return fmt->width * fmt->height * 2;
+	default:
+		return 0;
+	}
+}
+
+static int isp4vid_qops_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers,
+				    unsigned int *nplanes, unsigned int sizes[],
+				    struct device *alloc_devs[])
+{
+	struct isp4vid_dev *isp_vdev = vb2_get_drv_priv(vq);
+	unsigned int q_num_bufs = vb2_get_num_buffers(vq);
+
+	if (*nplanes > 1) {
+		dev_err(isp_vdev->dev,
+			"fail to setup queue, no mplane supported %u\n",
+			*nplanes);
+		return -EINVAL;
+	}
+
+	if (*nplanes == 1) {
+		unsigned int size;
+
+		size = isp4vid_get_image_size(&isp_vdev->format);
+		if (sizes[0] < size) {
+			dev_err(isp_vdev->dev,
+				"fail for small plane size %u, %u expected\n",
+				sizes[0], size);
+			return -EINVAL;
+		}
+	}
+
+	if (q_num_bufs + *nbuffers < ISP4IF_MAX_STREAM_BUF_COUNT)
+		*nbuffers = ISP4IF_MAX_STREAM_BUF_COUNT - q_num_bufs;
+
+	switch (isp_vdev->format.pixelformat) {
+	case V4L2_PIX_FMT_NV12:
+	case V4L2_PIX_FMT_YUYV: {
+		*nplanes = 1;
+		sizes[0] = max(sizes[0], isp_vdev->format.sizeimage);
+		isp_vdev->format.sizeimage = sizes[0];
+	}
+	break;
+	default:
+		dev_err(isp_vdev->dev, "%s|unsupported fmt=%u\n",
+			isp_vdev->vdev.name, isp_vdev->format.pixelformat);
+		return -EINVAL;
+	}
+
+	dev_dbg(isp_vdev->dev, "%s|*nbuffers=%u *nplanes=%u sizes[0]=%u\n",
+		isp_vdev->vdev.name,
+		*nbuffers, *nplanes, sizes[0]);
+
+	return 0;
+}
+
+static void isp4vid_qops_buffer_queue(struct vb2_buffer *vb)
+{
+	struct isp4vid_capture_buffer *buf =
+		container_of(vb, struct isp4vid_capture_buffer, vb2.vb2_buf);
+	struct isp4vid_dev *isp_vdev = vb2_get_drv_priv(vb->vb2_queue);
+
+	struct isp4vid_vb2_buf *priv_buf = vb->planes[0].mem_priv;
+	struct isp4if_img_buf_info *img_buf = &buf->img_buf;
+
+	dev_dbg(isp_vdev->dev, "%s|index=%u", isp_vdev->vdev.name, vb->index);
+
+	dev_dbg(isp_vdev->dev, "queue isp user bo 0x%llx size=%lu",
+		priv_buf->gpu_addr,
+		priv_buf->size);
+
+	switch (isp_vdev->format.pixelformat) {
+	case V4L2_PIX_FMT_NV12: {
+		u32 y_size = isp_vdev->format.sizeimage / 3 * 2;
+		u32 uv_size = isp_vdev->format.sizeimage / 3;
+
+		img_buf->planes[0].len = y_size;
+		img_buf->planes[0].sys_addr = priv_buf->vaddr;
+		img_buf->planes[0].mc_addr = priv_buf->gpu_addr;
+
+		dev_dbg(isp_vdev->dev, "img_buf[0]: mc=0x%llx size=%u",
+			img_buf->planes[0].mc_addr,
+			img_buf->planes[0].len);
+
+		img_buf->planes[1].len = uv_size;
+		img_buf->planes[1].sys_addr = priv_buf->vaddr + y_size;
+		img_buf->planes[1].mc_addr = priv_buf->gpu_addr + y_size;
+
+		dev_dbg(isp_vdev->dev, "img_buf[1]: mc=0x%llx size=%u",
+			img_buf->planes[1].mc_addr,
+			img_buf->planes[1].len);
+
+		img_buf->planes[2].len = 0;
+	}
+	break;
+	case V4L2_PIX_FMT_YUYV: {
+		img_buf->planes[0].len = isp_vdev->format.sizeimage;
+		img_buf->planes[0].sys_addr = priv_buf->vaddr;
+		img_buf->planes[0].mc_addr = priv_buf->gpu_addr;
+
+		dev_dbg(isp_vdev->dev, "img_buf[0]: mc=0x%llx size=%u",
+			img_buf->planes[0].mc_addr,
+			img_buf->planes[0].len);
+
+		img_buf->planes[1].len = 0;
+		img_buf->planes[2].len = 0;
+	}
+	break;
+	default:
+		dev_err(isp_vdev->dev, "%s|unsupported fmt=%u",
+			isp_vdev->vdev.name, isp_vdev->format.pixelformat);
+		return;
+	}
+
+	if (isp_vdev->stream_started)
+		isp_vdev->ops->send_buffer(isp_vdev->isp_sdev, img_buf);
+
+	scoped_guard(mutex, &isp_vdev->buf_list_lock)
+		list_add_tail(&buf->list, &isp_vdev->buf_list);
+}
+
+static int isp4vid_qops_start_streaming(struct vb2_queue *vq, unsigned int count)
+{
+	struct isp4vid_dev *isp_vdev = vb2_get_drv_priv(vq);
+	struct isp4vid_capture_buffer *isp4vid_buf;
+	struct media_entity *entity;
+	struct v4l2_subdev *subdev;
+	struct media_pad *pad;
+	int ret = 0;
+
+	isp_vdev->sequence = 0;
+	ret = v4l2_pipeline_pm_get(&isp_vdev->vdev.entity);
+	if (ret) {
+		dev_err(isp_vdev->dev, "power up isp fail %d\n", ret);
+		goto release_buffers;
+	}
+
+	entity = &isp_vdev->vdev.entity;
+	while (1) {
+		pad = &entity->pads[0];
+		if (!(pad->flags & MEDIA_PAD_FL_SINK))
+			break;
+
+		pad = media_pad_remote_pad_first(pad);
+		if (!pad || !is_media_entity_v4l2_subdev(pad->entity))
+			break;
+
+		entity = pad->entity;
+		subdev = media_entity_to_v4l2_subdev(entity);
+
+		ret = v4l2_subdev_call(subdev, video, s_stream, 1);
+		if (ret < 0 && ret != -ENOIOCTLCMD) {
+			dev_dbg(isp_vdev->dev, "fail start streaming: %s %d\n",
+				subdev->name, ret);
+			goto release_buffers;
+		}
+	}
+
+	list_for_each_entry(isp4vid_buf, &isp_vdev->buf_list, list)
+		isp_vdev->ops->send_buffer(isp_vdev->isp_sdev, &isp4vid_buf->img_buf);
+
+	/* Start the media pipeline */
+	ret = video_device_pipeline_start(&isp_vdev->vdev, &isp_vdev->pipe);
+	if (ret) {
+		dev_err(isp_vdev->dev, "video_device_pipeline_start fail:%d",
+			ret);
+		goto release_buffers;
+	}
+
+	isp_vdev->stream_started = true;
+
+	return 0;
+
+release_buffers:
+	isp4vid_capture_return_all_buffers(isp_vdev, VB2_BUF_STATE_QUEUED);
+	return ret;
+}
+
+static void isp4vid_qops_stop_streaming(struct vb2_queue *vq)
+{
+	struct isp4vid_dev *isp_vdev = vb2_get_drv_priv(vq);
+	struct v4l2_subdev *subdev;
+	struct media_entity *entity;
+	struct media_pad *pad;
+	int ret;
+
+	entity = &isp_vdev->vdev.entity;
+	while (1) {
+		pad = &entity->pads[0];
+		if (!(pad->flags & MEDIA_PAD_FL_SINK))
+			break;
+
+		pad = media_pad_remote_pad_first(pad);
+		if (!pad || !is_media_entity_v4l2_subdev(pad->entity))
+			break;
+
+		entity = pad->entity;
+		subdev = media_entity_to_v4l2_subdev(entity);
+
+		ret = v4l2_subdev_call(subdev, video, s_stream, 0);
+
+		if (ret < 0 && ret != -ENOIOCTLCMD)
+			dev_dbg(isp_vdev->dev, "fail stop streaming: %s %d\n",
+				subdev->name, ret);
+	}
+
+	isp_vdev->stream_started = false;
+	v4l2_pipeline_pm_put(&isp_vdev->vdev.entity);
+
+	/* Stop the media pipeline */
+	video_device_pipeline_stop(&isp_vdev->vdev);
+
+	/* Release all active buffers */
+	isp4vid_capture_return_all_buffers(isp_vdev, VB2_BUF_STATE_ERROR);
+}
+
+static const struct vb2_ops isp4vid_qops = {
+	.queue_setup = isp4vid_qops_queue_setup,
+	.buf_queue = isp4vid_qops_buffer_queue,
+	.start_streaming = isp4vid_qops_start_streaming,
+	.stop_streaming = isp4vid_qops_stop_streaming,
+	.wait_prepare = vb2_ops_wait_prepare,
+	.wait_finish = vb2_ops_wait_finish,
+};
+
+int isp4vid_dev_init(struct isp4vid_dev *isp_vdev, struct v4l2_subdev *isp_sdev,
+		     const struct isp4vid_ops *ops)
+{
+	const char *vdev_name = isp4vid_video_dev_name;
+	struct v4l2_device *v4l2_dev;
+	struct video_device *vdev;
+	struct vb2_queue *q;
+	int ret;
+
+	if (!isp_vdev || !isp_sdev || !isp_sdev->v4l2_dev)
+		return -EINVAL;
+
+	v4l2_dev = isp_sdev->v4l2_dev;
+	vdev = &isp_vdev->vdev;
+
+	isp_vdev->isp_sdev = isp_sdev;
+	isp_vdev->dev = v4l2_dev->dev;
+	isp_vdev->ops = ops;
+
+	/* Initialize the vb2_queue struct */
+	mutex_init(&isp_vdev->vbq_lock);
+	q = &isp_vdev->vbq;
+	q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	q->io_modes = VB2_MMAP | VB2_DMABUF;
+	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+	q->buf_struct_size = sizeof(struct isp4vid_capture_buffer);
+	q->min_queued_buffers = 2;
+	q->ops = &isp4vid_qops;
+	q->drv_priv = isp_vdev;
+	q->mem_ops = &isp4vid_vb2_memops;
+	q->lock = &isp_vdev->vbq_lock;
+	q->dev = v4l2_dev->dev;
+	ret = vb2_queue_init(q);
+	if (ret) {
+		dev_err(v4l2_dev->dev, "vb2_queue_init error:%d", ret);
+		return ret;
+	}
+	/* Initialize buffer list and its lock */
+	mutex_init(&isp_vdev->buf_list_lock);
+	INIT_LIST_HEAD(&isp_vdev->buf_list);
+
+	/* Set default frame format */
+	isp_vdev->format = isp4vid_fmt_default;
+	isp_vdev->timeperframe = ISP4VID_ISP_TPF_DEFAULT;
+	v4l2_simplify_fraction(&isp_vdev->timeperframe.numerator,
+			       &isp_vdev->timeperframe.denominator, 8, 333);
+
+	ret = isp4vid_fill_buffer_size(&isp_vdev->format);
+	if (ret) {
+		dev_err(v4l2_dev->dev, "fail to fill buffer size: %d\n", ret);
+		goto err_release_vb2_queue;
+	}
+
+	ret = isp4vid_set_fmt_2_isp(isp_sdev, &isp_vdev->format);
+	if (ret) {
+		dev_err(v4l2_dev->dev, "fail init format :%d\n", ret);
+		goto err_release_vb2_queue;
+	}
+
+	/* Initialize the video_device struct */
+	isp_vdev->vdev.entity.name = vdev_name;
+	isp_vdev->vdev.entity.function = MEDIA_ENT_F_IO_V4L;
+	isp_vdev->vdev_pad.flags = MEDIA_PAD_FL_SINK;
+	ret = media_entity_pads_init(&isp_vdev->vdev.entity, 1,
+				     &isp_vdev->vdev_pad);
+
+	if (ret) {
+		dev_err(v4l2_dev->dev, "init media entity pad fail:%d\n", ret);
+		goto err_release_vb2_queue;
+	}
+
+	vdev->device_caps = V4L2_CAP_VIDEO_CAPTURE |
+			    V4L2_CAP_STREAMING | V4L2_CAP_IO_MC;
+	vdev->entity.ops = &isp4vid_vdev_ent_ops;
+	vdev->release = video_device_release_empty;
+	vdev->fops = &isp4vid_vdev_fops;
+	vdev->ioctl_ops = &isp4vid_vdev_ioctl_ops;
+	vdev->lock = NULL;
+	vdev->queue = q;
+	vdev->v4l2_dev = v4l2_dev;
+	vdev->vfl_dir = VFL_DIR_RX;
+	strscpy(vdev->name, vdev_name, sizeof(vdev->name));
+	video_set_drvdata(vdev, isp_vdev);
+
+	ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1);
+	if (ret) {
+		dev_err(v4l2_dev->dev, "register video device fail:%d\n", ret);
+		goto err_entity_cleanup;
+	}
+
+	return 0;
+
+err_entity_cleanup:
+	media_entity_cleanup(&isp_vdev->vdev.entity);
+err_release_vb2_queue:
+	vb2_queue_release(q);
+	return ret;
+}
+
+void isp4vid_dev_deinit(struct isp4vid_dev *isp_vdev)
+{
+	vb2_video_unregister_device(&isp_vdev->vdev);
+}
diff --git a/drivers/media/platform/amd/isp4/isp4_video.h b/drivers/media/platform/amd/isp4/isp4_video.h
new file mode 100644
index 000000000000..b87316d2a2e5
--- /dev/null
+++ b/drivers/media/platform/amd/isp4/isp4_video.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef _ISP4_VIDEO_H_
+#define _ISP4_VIDEO_H_
+
+#include <media/videobuf2-memops.h>
+#include <media/v4l2-dev.h>
+
+#include "isp4_interface.h"
+
+struct isp4vid_capture_buffer {
+	/*
+	 * struct vb2_v4l2_buffer must be the first element
+	 * the videobuf2 framework will allocate this struct based on
+	 * buf_struct_size and use the first sizeof(struct vb2_buffer) bytes of
+	 * memory as a vb2_buffer
+	 */
+	struct vb2_v4l2_buffer vb2;
+	struct isp4if_img_buf_info img_buf;
+	struct list_head list;
+};
+
+struct isp4vid_ops {
+	int (*send_buffer)(struct v4l2_subdev *sd,
+			   struct isp4if_img_buf_info *img_buf);
+};
+
+struct isp4vid_dev {
+	struct video_device vdev;
+	struct media_pad vdev_pad;
+	struct v4l2_pix_format format;
+
+	/* mutex that protects vbq */
+	struct mutex vbq_lock;
+	struct vb2_queue vbq;
+
+	/* mutex that protects buf_list */
+	struct mutex buf_list_lock;
+	struct list_head buf_list;
+
+	u32 sequence;
+	bool stream_started;
+
+	struct media_pipeline pipe;
+	struct device *dev;
+	struct v4l2_subdev *isp_sdev;
+	struct v4l2_fract timeperframe;
+
+	/* Callback operations */
+	const struct isp4vid_ops *ops;
+};
+
+int isp4vid_dev_init(struct isp4vid_dev *isp_vdev,
+		     struct v4l2_subdev *isp_sdev,
+		     const struct isp4vid_ops *ops);
+
+void isp4vid_dev_deinit(struct isp4vid_dev *isp_vdev);
+
+void isp4vid_handle_frame_done(struct isp4vid_dev *isp_vdev,
+			       const struct isp4if_img_buf_info *img_buf);
+
+#endif /* _ISP4_VIDEO_H_ */
-- 
2.52.0

From 308c15caa7a912377576bcc98c4d37f80964b13f Mon Sep 17 00:00:00 2001
From: Eric Naim <dnaim@cachyos.org>
Date: Mon, 26 Jan 2026 17:33:29 +0800
Subject: [PATCH 2/9] autofdo

Signed-off-by: Eric Naim <dnaim@cachyos.org>
---
 Makefile                   |  9 +++++----
 arch/Kconfig               | 11 +++++++++++
 scripts/Makefile.autofdo   | 12 +++++++++---
 scripts/Makefile.lib       |  9 ++++++---
 scripts/Makefile.propeller | 20 ++++++++++++++++----
 scripts/Makefile.vmlinux_o |  3 ++-
 6 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/Makefile b/Makefile
index 3373308d2217..c8a588fef62c 100644
--- a/Makefile
+++ b/Makefile
@@ -1264,14 +1264,15 @@ PHONY += vmlinux
 # not for decompressors. LDFLAGS_vmlinux in arch/*/boot/compressed/Makefile is
 # unrelated; the decompressors just happen to have the same base name,
 # arch/*/boot/compressed/vmlinux.
-# Export LDFLAGS_vmlinux only to scripts/Makefile.vmlinux.
+# Export LDFLAGS_vmlinux only to scripts/Makefile.vmlinux, and
+# scripts/Makefile.vmlinux_o.
 #
 # _LDFLAGS_vmlinux is a workaround for the 'private export' bug:
 #   https://savannah.gnu.org/bugs/?61463
 # For Make > 4.4, the following simple code will work:
-#  vmlinux: private export LDFLAGS_vmlinux := $(LDFLAGS_vmlinux)
-vmlinux: private _LDFLAGS_vmlinux := $(LDFLAGS_vmlinux)
-vmlinux: export LDFLAGS_vmlinux = $(_LDFLAGS_vmlinux)
+#  vmlinux vmlinux_o: private export LDFLAGS_vmlinux := $(LDFLAGS_vmlinux)
+vmlinux vmlinux_o: private _LDFLAGS_vmlinux := $(LDFLAGS_vmlinux)
+vmlinux vmlinux_o: export LDFLAGS_vmlinux = $(_LDFLAGS_vmlinux)
 vmlinux: vmlinux.o $(KBUILD_LDS) modpost
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux
 
diff --git a/arch/Kconfig b/arch/Kconfig
index 31220f512b16..1401fd81cfe0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -883,6 +883,17 @@ config AUTOFDO_CLANG
 
 	  If unsure, say N.
 
+config AUTOFDO_PROFILE_ACCURATE
+	bool "Assert AutoFDO profile is accurate (EXPERIMENTAL)"
+	depends on AUTOFDO_CLANG
+	help
+	  This option asserts that the AutoFDO profile (specified
+	  in CLANG_AUTOFDO_PROFILE) is collected from a representative
+	  workload, allowing the Clang compiler to perform more
+	  aggressive optimizations.
+
+	  If unsure, say N.
+
 config ARCH_SUPPORTS_PROPELLER_CLANG
 	bool
 
diff --git a/scripts/Makefile.autofdo b/scripts/Makefile.autofdo
index 1caf2457e585..36abeae2accd 100644
--- a/scripts/Makefile.autofdo
+++ b/scripts/Makefile.autofdo
@@ -11,14 +11,20 @@ endif
 ifdef CLANG_AUTOFDO_PROFILE
   CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) -ffunction-sections
   CFLAGS_AUTOFDO_CLANG += -fsplit-machine-functions
+ifdef CONFIG_AUTOFDO_PROFILE_ACCURATE
+  CFLAGS_AUTOFDO_CLANG += -fprofile-sample-accurate
+endif
 endif
 
 ifdef CONFIG_LTO_CLANG_THIN
+  _ldflags_autofdo := --mllvm=-enable-fs-discriminator=true --mllvm=-improved-fs-discriminator=true -plugin-opt=thinlto
+  _ldflags_autofdo += -plugin-opt=-split-machine-functions
   ifdef CLANG_AUTOFDO_PROFILE
-    KBUILD_LDFLAGS += --lto-sample-profile=$(CLANG_AUTOFDO_PROFILE)
+    _ldflags_autofdo += --lto-sample-profile=$(CLANG_AUTOFDO_PROFILE)
   endif
-  KBUILD_LDFLAGS += --mllvm=-enable-fs-discriminator=true --mllvm=-improved-fs-discriminator=true -plugin-opt=thinlto
-  KBUILD_LDFLAGS += -plugin-opt=-split-machine-functions
+  # TODO: change LDFLAGS_vmlinux to KBUILD_LDFLAGS when kernel modules
+  # are supported.
+  LDFLAGS_vmlinux += $(_ldflags_autofdo)
 endif
 
 export CFLAGS_AUTOFDO_CLANG
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 28a1c08e3b22..0906a02de885 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -108,20 +108,23 @@ endif
 #
 # Enable AutoFDO build flags except some files or directories we don't want to
 # enable (depends on variables AUTOFDO_PROFILE_obj.o and AUTOFDO_PROFILE).
-#
+# TODO: change '$(part-of-builtin)' to '$(is-kernel-object)' when the AutoFDO
+# build supports modules.
 ifeq ($(CONFIG_AUTOFDO_CLANG),y)
 _c_flags += $(if $(patsubst n%,, \
-	$(AUTOFDO_PROFILE_$(target-stem).o)$(AUTOFDO_PROFILE)$(is-kernel-object)), \
+	$(AUTOFDO_PROFILE_$(target-stem).o)$(AUTOFDO_PROFILE)$(part-of-builtin)), \
 	$(CFLAGS_AUTOFDO_CLANG))
 endif
 
 #
 # Enable Propeller build flags except some files or directories we don't want to
 # enable (depends on variables AUTOFDO_PROPELLER_obj.o and PROPELLER_PROFILE).
+# TODO: change '$(part-of-builtin)' to '$(is-kernel-object)' when the Propeller
+# build supports modules.
 #
 ifdef CONFIG_PROPELLER_CLANG
 _c_flags += $(if $(patsubst n%,, \
-	$(AUTOFDO_PROFILE_$(target-stem).o)$(AUTOFDO_PROFILE)$(PROPELLER_PROFILE))$(is-kernel-object), \
+	$(PROPELLER_PROFILE_$(target-stem).o)$(PROPELLER_PROFILE)$(part-of-builtin)), \
 	$(CFLAGS_PROPELLER_CLANG))
 endif
 
diff --git a/scripts/Makefile.propeller b/scripts/Makefile.propeller
index 48a660128e25..8c3e514dbcc0 100644
--- a/scripts/Makefile.propeller
+++ b/scripts/Makefile.propeller
@@ -3,7 +3,7 @@
 # Enable available and selected Clang Propeller features.
 ifdef CLANG_PROPELLER_PROFILE_PREFIX
   CFLAGS_PROPELLER_CLANG := -fbasic-block-sections=list=$(CLANG_PROPELLER_PROFILE_PREFIX)_cc_profile.txt -ffunction-sections
-  KBUILD_LDFLAGS += --symbol-ordering-file=$(CLANG_PROPELLER_PROFILE_PREFIX)_ld_profile.txt --no-warn-symbol-ordering
+  _ldflags_propeller := --symbol-ordering-file=$(CLANG_PROPELLER_PROFILE_PREFIX)_ld_profile.txt --no-warn-symbol-ordering
 else
   # Starting with Clang v20, the '-fbasic-block-sections=labels' option is
   # deprecated. Use the recommended '-fbasic-block-address-map' option.
@@ -13,8 +13,14 @@ else
   else
     CFLAGS_PROPELLER_CLANG := -fbasic-block-sections=labels
   endif
+  # Disable Machine Function Splitter for Propeller.
+  # Propeller profile generation requires Machine Function Splitter to be disabled.
+  # We use -fno-split-machine-functions to override any prior AutoFDO/PGO flags
+  #(in Makefile.lib), assuming Propeller options are applied afterward.
+  CFLAGS_PROPELLER_CLANG += -fno-split-machine-functions
 endif
 
+
 # Propeller requires debug information to embed module names in the profiles.
 # If CONFIG_DEBUG_INFO is not enabled, set -gmlt option. Skip this for AutoFDO,
 # as the option should already be set.
@@ -26,14 +32,20 @@ endif
 
 ifdef CONFIG_LTO_CLANG_THIN
   ifdef CLANG_PROPELLER_PROFILE_PREFIX
-    KBUILD_LDFLAGS += --lto-basic-block-sections=$(CLANG_PROPELLER_PROFILE_PREFIX)_cc_profile.txt
+    _ldflags_propeller += --lto-basic-block-sections=$(CLANG_PROPELLER_PROFILE_PREFIX)_cc_profile.txt
   else
     ifeq ($(call test-ge, $(CONFIG_LLD_VERSION), 200000),y)
-       KBUILD_LDFLAGS += --lto-basic-block-address-map
+       _ldflags_propeller += --lto-basic-block-address-map
     else
-       KBUILD_LDFLAGS += --lto-basic-block-sections=labels
+       _ldflags_propeller += --lto-basic-block-sections=labels
     endif
+    # Again, we need to disable Machine Function Splitter.
+    LDFLAGS_vmlinux := $(filter-out -plugin-opt=-split-machine-functions, $(LDFLAGS_vmlinux))
   endif
 endif
 
+# TODO: change LDFLAGS_vmlinux to KBUILD_LDFLAGS when kernel modules
+# are supported.
+LDFLAGS_vmlinux += $(_ldflags_propeller)
+
 export CFLAGS_PROPELLER_CLANG
diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o
index 527352c222ff..a5a18776b582 100644
--- a/scripts/Makefile.vmlinux_o
+++ b/scripts/Makefile.vmlinux_o
@@ -56,7 +56,8 @@ vmlinux-o-ld-args-$(CONFIG_BUILTIN_MODULE_RANGES)	+= -Map=$@.map
 
 quiet_cmd_ld_vmlinux.o = LD      $@
       cmd_ld_vmlinux.o = \
-	$(LD) ${KBUILD_LDFLAGS} -r -o $@ \
+	$(LD) $(KBUILD_LDFLAGS) \
+	$(filter-out -pie, $(LDFLAGS_vmlinux)) -r -o $@ \
 	$(vmlinux-o-ld-args-y) \
 	$(addprefix -T , $(initcalls-lds)) \
 	--whole-archive vmlinux.a --no-whole-archive \
-- 
2.52.0

From 0d20e99fbb074487283ea86722f36d3f2c4c0d6a Mon Sep 17 00:00:00 2001
From: Eric Naim <dnaim@cachyos.org>
Date: Mon, 26 Jan 2026 17:33:29 +0800
Subject: [PATCH 3/9] bbr3

Signed-off-by: Eric Naim <dnaim@cachyos.org>
---
 include/linux/tcp.h                |    6 +-
 include/net/inet_connection_sock.h |    4 +-
 include/net/tcp.h                  |   72 +-
 include/net/tcp_ecn.h              |    6 +-
 include/uapi/linux/inet_diag.h     |   23 +
 include/uapi/linux/rtnetlink.h     |    4 +-
 include/uapi/linux/tcp.h           |    1 +
 net/ipv4/Kconfig                   |   21 +-
 net/ipv4/bpf_tcp_ca.c              |    4 +-
 net/ipv4/tcp.c                     |    3 +
 net/ipv4/tcp_bbr.c                 | 2233 +++++++++++++++++++++-------
 net/ipv4/tcp_cong.c                |    1 +
 net/ipv4/tcp_input.c               |   40 +-
 net/ipv4/tcp_minisocks.c           |    2 +
 net/ipv4/tcp_output.c              |   42 +-
 net/ipv4/tcp_rate.c                |   30 +-
 net/ipv4/tcp_timer.c               |    1 +
 17 files changed, 1939 insertions(+), 554 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 20b8c6e21fef..e334b7a7aac2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -236,7 +236,8 @@ struct tcp_sock {
 		tcp_usec_ts : 1, /* TSval values in usec */
 		is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */
 		is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
-		recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */
+		recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */
+		fast_ack_mode:1;/* ack ASAP if >1 rcv_mss received? */
 	__cacheline_group_end(tcp_sock_read_txrx);
 
 	/* RX read-mostly hotpath cache lines */
@@ -292,7 +293,8 @@ struct tcp_sock {
  *	0x5?10 << 16 + snd_wnd in net byte order
  */
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
-		rate_app_limited:1;  /* rate_{delivered,interval_us} limited? */
+		rate_app_limited:1,  /* rate_{delivered,interval_us} limited? */
+		tlp_orig_data_app_limited:1; /* app-limited before TLP rtx? */
 	u8	received_ce_pending:4, /* Not yet transmit cnt of received_ce */
 		unused2:4;
 	u8	accecn_minlen:2,/* Minimum length of AccECN option sent */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index ecb362025c4e..9de884b7fe01 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -137,8 +137,8 @@ struct inet_connection_sock {
 	u32			  icsk_probes_tstamp;
 	u32			  icsk_user_timeout;
 
-	u64			  icsk_ca_priv[104 / sizeof(u64)];
-#define ICSK_CA_PRIV_SIZE	  sizeof_field(struct inet_connection_sock, icsk_ca_priv)
+#define ICSK_CA_PRIV_SIZE      (144)
+	u64			  icsk_ca_priv[ICSK_CA_PRIV_SIZE / sizeof(u64)];
 };
 
 #define ICSK_TIME_RETRANS	1	/* Retransmit timer */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0deb5e9dd911..4e01320786b3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -406,6 +406,8 @@ static inline void tcp_dec_quickack_mode(struct sock *sk)
 #define	TCP_ECN_DEMAND_CWR	BIT(2)
 #define	TCP_ECN_SEEN		BIT(3)
 #define	TCP_ECN_MODE_ACCECN	BIT(4)
+#define	TCP_ECN_LOW		BIT(5)
+#define	TCP_ECN_ECT_PERMANENT	BIT(6)
 
 #define	TCP_ECN_DISABLED	0
 #define	TCP_ECN_MODE_PENDING	(TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN)
@@ -851,6 +853,15 @@ static inline unsigned long tcp_reqsk_timeout(struct request_sock *req)
 
 u32 tcp_delack_max(const struct sock *sk);
 
+static inline void tcp_set_ecn_low_from_dst(struct sock *sk,
+					    const struct dst_entry *dst)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (dst_feature(dst, RTAX_FEATURE_ECN_LOW))
+		tp->ecn_flags |= TCP_ECN_LOW;
+}
+
 /* Compute the actual rto_min value */
 static inline u32 tcp_rto_min(const struct sock *sk)
 {
@@ -956,6 +967,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
 	return max_t(s64, t1 - t0, 0);
 }
 
+static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0)
+{
+	return max_t(s32, t1 - t0, 0);
+}
+
 /* provide the departure time in us unit */
 static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
 {
@@ -1066,9 +1082,14 @@ struct tcp_skb_cb {
 			/* pkts S/ACKed so far upon tx of skb, incl retrans: */
 			__u32 delivered;
 			/* start of send pipeline phase */
-			u64 first_tx_mstamp;
+			u32 first_tx_mstamp;
 			/* when we reached the "delivered" count */
-			u64 delivered_mstamp;
+			u32 delivered_mstamp;
+#define TCPCB_IN_FLIGHT_BITS 20
+#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
+			u32 in_flight:20,   /* packets in flight at transmit */
+			    unused2:12;
+			u32 lost;	/* packets lost so far upon tx of skb */
 		} tx;   /* only used for outgoing skbs */
 		union {
 			struct inet_skb_parm	h4;
@@ -1181,6 +1202,7 @@ enum tcp_ca_event {
 	CA_EVENT_LOSS,		/* loss timeout */
 	CA_EVENT_ECN_NO_CE,	/* ECT set, but not CE marked */
 	CA_EVENT_ECN_IS_CE,	/* received CE marked IP packet */
+	CA_EVENT_TLP_RECOVERY,	/* a lost segment was repaired by TLP probe */
 };
 
 /* Information about inbound ACK, passed to cong_ops->in_ack_event() */
@@ -1203,7 +1225,11 @@ enum tcp_ca_ack_event_flags {
 #define TCP_CONG_NON_RESTRICTED		BIT(0)
 /* Requires ECN/ECT set on all packets */
 #define TCP_CONG_NEEDS_ECN		BIT(1)
-#define TCP_CONG_MASK	(TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
+/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */
+#define TCP_CONG_WANTS_CE_EVENTS	BIT(2)
+#define TCP_CONG_MASK	(TCP_CONG_NON_RESTRICTED | \
+			 TCP_CONG_NEEDS_ECN | \
+			 TCP_CONG_WANTS_CE_EVENTS)
 
 union tcp_cc_info;
 
@@ -1223,10 +1249,13 @@ struct ack_sample {
  */
 struct rate_sample {
 	u64  prior_mstamp; /* starting timestamp for interval */
+	u32  prior_lost;	/* tp->lost at "prior_mstamp" */
 	u32  prior_delivered;	/* tp->delivered at "prior_mstamp" */
 	u32  prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
+	u32 tx_in_flight;	/* packets in flight at starting timestamp */
+	s32  lost;		/* number of packets lost over interval */
 	s32  delivered;		/* number of packets delivered over interval */
-	s32  delivered_ce;	/* number of packets delivered w/ CE marks*/
+	s32  delivered_ce;	/* packets delivered w/ CE mark over interval */
 	long interval_us;	/* time for tp->delivered to incr "delivered" */
 	u32 snd_interval_us;	/* snd interval for delivered packets */
 	u32 rcv_interval_us;	/* rcv interval for delivered packets */
@@ -1237,7 +1266,9 @@ struct rate_sample {
 	u32  last_end_seq;	/* end_seq of most recently ACKed packet */
 	bool is_app_limited;	/* is sample from packet with bubble in pipe? */
 	bool is_retrans;	/* is sample from retransmission? */
+	bool is_acking_tlp_retrans_seq;  /* ACKed a TLP retransmit sequence? */
 	bool is_ack_delayed;	/* is this (likely) a delayed ACK? */
+	bool is_ece;		/* did this ACK have ECN marked? */
 };
 
 struct tcp_congestion_ops {
@@ -1261,8 +1292,11 @@ struct tcp_congestion_ops {
 	/* hook for packet ack accounting (optional) */
 	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
 
-	/* override sysctl_tcp_min_tso_segs */
-	u32 (*min_tso_segs)(struct sock *sk);
+	/* pick target number of segments per TSO/GSO skb (optional): */
+	u32 (*tso_segs)(struct sock *sk, unsigned int mss_now);
+
+	/* react to a specific lost skb (optional) */
+	void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
 
 	/* call when packets are delivered to update cwnd and pacing rate,
 	 * after all the ca_state processing. (optional)
@@ -1328,6 +1362,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
 }
 #endif
 
+static inline bool tcp_ca_wants_ce_events(const struct sock *sk)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+
+	return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN |
+					   TCP_CONG_WANTS_CE_EVENTS);
+}
+
 static inline bool tcp_ca_needs_ecn(const struct sock *sk)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1347,6 +1389,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
 void tcp_set_ca_state(struct sock *sk, const u8 ca_state);
 
 /* From tcp_rate.c */
+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb);
 void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
 void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
 			    struct rate_sample *rs);
@@ -1359,6 +1402,21 @@ static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
 	return t1 > t2 || (t1 == t2 && after(seq1, seq2));
 }
 
+/* If a retransmit failed due to local qdisc congestion or other local issues,
+ * then we may have called tcp_set_skb_tso_segs() to increase the number of
+ * segments in the skb without increasing the tx.in_flight. In all other cases,
+ * the tx.in_flight should be at least as big as the pcount of the sk_buff.  We
+ * do not have the state to know whether a retransmit failed due to local qdisc
+ * congestion or other local issues, so to avoid spurious warnings we consider
+ * that any skb marked lost may have suffered that fate.
+ */
+static inline bool tcp_skb_tx_in_flight_is_suspicious(u32 skb_pcount,
+						      u32 skb_sacked_flags,
+						      u32 tx_in_flight)
+{
+	return (skb_pcount > tx_in_flight) && !(skb_sacked_flags & TCPCB_LOST);
+}
+
 /* These functions determine how the current flow behaves in respect of SACK
  * handling. SACK is negotiated with the peer, and therefore it can vary
  * between different flows.
@@ -2533,7 +2591,7 @@ struct tcp_plb_state {
 	u8	consec_cong_rounds:5, /* consecutive congested rounds */
 		unused:3;
 	u32	pause_until; /* jiffies32 when PLB can resume rerouting */
-};
+} __attribute__ ((__packed__));
 
 static inline void tcp_plb_init(const struct sock *sk,
 				struct tcp_plb_state *plb)
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index f13e5cd2b1ac..bc5de05260eb 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -583,10 +583,9 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
 	use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN ||
 		  tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN ||
 		  tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn;
+	const struct dst_entry *dst = __sk_dst_get(sk);
 
 	if (!use_ecn) {
-		const struct dst_entry *dst = __sk_dst_get(sk);
-
 		if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
 			use_ecn = true;
 	}
@@ -604,6 +603,9 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
 			tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK;
 		} else {
 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
+
+			if (dst)
+				tcp_set_ecn_low_from_dst(sk, dst);
 		}
 	}
 }
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index 86bb2e8b17c9..9d9a3eb2ce9b 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -229,6 +229,29 @@ struct tcp_bbr_info {
 	__u32	bbr_min_rtt;		/* min-filtered RTT in uSec */
 	__u32	bbr_pacing_gain;	/* pacing gain shifted left 8 bits */
 	__u32	bbr_cwnd_gain;		/* cwnd gain shifted left 8 bits */
+	__u32	bbr_bw_hi_lsb;		/* lower 32 bits of bw_hi */
+	__u32	bbr_bw_hi_msb;		/* upper 32 bits of bw_hi */
+	__u32	bbr_bw_lo_lsb;		/* lower 32 bits of bw_lo */
+	__u32	bbr_bw_lo_msb;		/* upper 32 bits of bw_lo */
+	__u8	bbr_mode;		/* current bbr_mode in state machine */
+	__u8	bbr_phase;		/* current state machine phase */
+	__u8	unused1;		/* alignment padding; not used yet */
+	__u8	bbr_version;		/* BBR algorithm version */
+	__u32	bbr_inflight_lo;	/* lower short-term data volume bound */
+	__u32	bbr_inflight_hi;	/* higher long-term data volume bound */
+	__u32	bbr_extra_acked;	/* max excess packets ACKed in epoch */
+};
+
+/* TCP BBR congestion control bbr_phase as reported in netlink/ss stats. */
+enum tcp_bbr_phase {
+	BBR_PHASE_INVALID		= 0,
+	BBR_PHASE_STARTUP		= 1,
+	BBR_PHASE_DRAIN			= 2,
+	BBR_PHASE_PROBE_RTT		= 3,
+	BBR_PHASE_PROBE_BW_UP		= 4,
+	BBR_PHASE_PROBE_BW_DOWN		= 5,
+	BBR_PHASE_PROBE_BW_CRUISE	= 6,
+	BBR_PHASE_PROBE_BW_REFILL	= 7,
 };
 
 union tcp_cc_info {
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index dab9493c791b..cce4975fdcfe 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -517,12 +517,14 @@ enum {
 #define RTAX_FEATURE_TIMESTAMP		(1 << 2) /* unused */
 #define RTAX_FEATURE_ALLFRAG		(1 << 3) /* unused */
 #define RTAX_FEATURE_TCP_USEC_TS	(1 << 4)
+#define RTAX_FEATURE_ECN_LOW		(1 << 5)
 
 #define RTAX_FEATURE_MASK	(RTAX_FEATURE_ECN |		\
 				 RTAX_FEATURE_SACK |		\
 				 RTAX_FEATURE_TIMESTAMP |	\
 				 RTAX_FEATURE_ALLFRAG |		\
-				 RTAX_FEATURE_TCP_USEC_TS)
+				 RTAX_FEATURE_TCP_USEC_TS |	\
+				 RTAX_FEATURE_ECN_LOW)
 
 struct rta_session {
 	__u8	proto;
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index dce3113787a7..6efba4f74f6f 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -185,6 +185,7 @@ enum tcp_fastopen_client_fail {
 #define TCPI_OPT_SYN_DATA	32 /* SYN-ACK acked data in SYN sent or rcvd */
 #define TCPI_OPT_USEC_TS	64 /* usec timestamps */
 #define TCPI_OPT_TFO_CHILD	128 /* child from a Fast Open option on SYN */
+#define TCPI_OPT_ECN_LOW	256 /* Low-latency ECN enabled at conn init */
 
 /*
  * Sender's congestion state indicating normal or abnormal situations
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index b71c22475c51..85d95a59708e 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -669,15 +669,18 @@ config TCP_CONG_BBR
 	default n
 	help
 
-	  BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to
-	  maximize network utilization and minimize queues. It builds an explicit
-	  model of the bottleneck delivery rate and path round-trip propagation
-	  delay. It tolerates packet loss and delay unrelated to congestion. It
-	  can operate over LAN, WAN, cellular, wifi, or cable modem links. It can
-	  coexist with flows that use loss-based congestion control, and can
-	  operate with shallow buffers, deep buffers, bufferbloat, policers, or
-	  AQM schemes that do not provide a delay signal. It requires the fq
-	  ("Fair Queue") pacing packet scheduler.
+	  BBR (Bottleneck Bandwidth and RTT) TCP congestion control is a
+	  model-based congestion control algorithm that aims to maximize
+	  network utilization, keep queues and retransmit rates low, and to be
+	  able to coexist with Reno/CUBIC in common scenarios. It builds an
+	  explicit model of the network path.  It tolerates a targeted degree
+	  of random packet loss and delay. It can operate over LAN, WAN,
+	  cellular, wifi, or cable modem links, and can use shallow-threshold
+	  ECN signals. It can coexist to some degree with flows that use
+	  loss-based congestion control, and can operate with shallow buffers,
+	  deep buffers, bufferbloat, policers, or AQM schemes that do not
+	  provide a delay signal. It requires pacing, using either TCP internal
+	  pacing or the fq ("Fair Queue") pacing packet scheduler.
 
 choice
 	prompt "Default TCP congestion control"
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index e01492234b0b..27893b774e08 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -280,7 +280,7 @@ static void bpf_tcp_ca_pkts_acked(struct sock *sk, const struct ack_sample *samp
 {
 }
 
-static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk)
+static u32 bpf_tcp_ca_tso_segs(struct sock *sk, unsigned int mss_now)
 {
 	return 0;
 }
@@ -315,7 +315,7 @@ static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = {
 	.cwnd_event = bpf_tcp_ca_cwnd_event,
 	.in_ack_event = bpf_tcp_ca_in_ack_event,
 	.pkts_acked = bpf_tcp_ca_pkts_acked,
-	.min_tso_segs = bpf_tcp_ca_min_tso_segs,
+	.tso_segs = bpf_tcp_ca_tso_segs,
 	.cong_control = bpf_tcp_ca_cong_control,
 	.undo_cwnd = bpf_tcp_ca_undo_cwnd,
 	.sndbuf_expand = bpf_tcp_ca_sndbuf_expand,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d5319ebe2452..4e4c7ea4eb2e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3468,6 +3468,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->rx_opt.dsack = 0;
 	tp->rx_opt.num_sacks = 0;
 	tp->rcv_ooopack = 0;
+	tp->fast_ack_mode = 0;
 
 
 	/* Clean up fastopen related fields */
@@ -4248,6 +4249,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 		info->tcpi_options |= TCPI_OPT_ECN;
 	if (tp->ecn_flags & TCP_ECN_SEEN)
 		info->tcpi_options |= TCPI_OPT_ECN_SEEN;
+	if (tp->ecn_flags & TCP_ECN_LOW)
+		info->tcpi_options |= TCPI_OPT_ECN_LOW;
 	if (tp->syn_data_acked)
 		info->tcpi_options |= TCPI_OPT_SYN_DATA;
 	if (tp->tcp_usec_ts)
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 760941e55153..9279be755c16 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -1,18 +1,19 @@
-/* Bottleneck Bandwidth and RTT (BBR) congestion control
+/* BBR (Bottleneck Bandwidth and RTT) congestion control
  *
- * BBR congestion control computes the sending rate based on the delivery
- * rate (throughput) estimated from ACKs. In a nutshell:
+ * BBR is a model-based congestion control algorithm that aims for low queues,
+ * low loss, and (bounded) Reno/CUBIC coexistence. To maintain a model of the
+ * network path, it uses measurements of bandwidth and RTT, as well as (if they
+ * occur) packet loss and/or shallow-threshold ECN signals. Note that although
+ * it can use ECN or loss signals explicitly, it does not require either; it
+ * can bound its in-flight data based on its estimate of the BDP.
  *
- *   On each ACK, update our model of the network path:
- *      bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips)
- *      min_rtt = windowed_min(rtt, 10 seconds)
- *   pacing_rate = pacing_gain * bottleneck_bandwidth
- *   cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4)
- *
- * The core algorithm does not react directly to packet losses or delays,
- * although BBR may adjust the size of next send per ACK when loss is
- * observed, or adjust the sending rate if it estimates there is a
- * traffic policer, in order to keep the drop rate reasonable.
+ * The model has both higher and lower bounds for the operating range:
+ *   lo: bw_lo, inflight_lo: conservative short-term lower bound
+ *   hi: bw_hi, inflight_hi: robust long-term upper bound
+ * The bandwidth-probing time scale is (a) extended dynamically based on
+ * estimated BDP to improve coexistence with Reno/CUBIC; (b) bounded by
+ * an interactive wall-clock time-scale to be more scalable and responsive
+ * than Reno and CUBIC.
  *
  * Here is a state transition diagram for BBR:
  *
@@ -65,6 +66,13 @@
 #include <linux/random.h>
 #include <linux/win_minmax.h>
 
+#include <trace/events/tcp.h>
+#include "tcp_dctcp.h"
+
+#define BBR_VERSION		3
+
+#define bbr_param(sk,name)	(bbr_ ## name)
+
 /* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth
  * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps.
  * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32.
@@ -85,36 +93,41 @@ enum bbr_mode {
 	BBR_PROBE_RTT,	/* cut inflight to min to probe min_rtt */
 };
 
+/* How does the incoming ACK stream relate to our bandwidth probing? */
+enum bbr_ack_phase {
+	BBR_ACKS_INIT,		  /* not probing; not getting probe feedback */
+	BBR_ACKS_REFILLING,	  /* sending at est. bw to fill pipe */
+	BBR_ACKS_PROBE_STARTING,  /* inflight rising to probe bw */
+	BBR_ACKS_PROBE_FEEDBACK,  /* getting feedback from bw probing */
+	BBR_ACKS_PROBE_STOPPING,  /* stopped probing; still getting feedback */
+};
+
 /* BBR congestion control block */
 struct bbr {
 	u32	min_rtt_us;	        /* min RTT in min_rtt_win_sec window */
 	u32	min_rtt_stamp;	        /* timestamp of min_rtt_us */
 	u32	probe_rtt_done_stamp;   /* end time for BBR_PROBE_RTT mode */
-	struct minmax bw;	/* Max recent delivery rate in pkts/uS << 24 */
-	u32	rtt_cnt;	    /* count of packet-timed rounds elapsed */
+	u32	probe_rtt_min_us;	/* min RTT in probe_rtt_win_ms win */
+	u32	probe_rtt_min_stamp;	/* timestamp of probe_rtt_min_us*/
 	u32     next_rtt_delivered; /* scb->tx.delivered at end of round */
 	u64	cycle_mstamp;	     /* time of this cycle phase start */
-	u32     mode:3,		     /* current bbr_mode in state machine */
+	u32     mode:2,		     /* current bbr_mode in state machine */
 		prev_ca_state:3,     /* CA state on previous ACK */
-		packet_conservation:1,  /* use packet conservation? */
 		round_start:1,	     /* start of packet-timed tx->ack round? */
+		ce_state:1,          /* If most recent data has CE bit set */
+		bw_probe_up_rounds:5,   /* cwnd-limited rounds in PROBE_UP */
+		try_fast_path:1,	/* can we take fast path? */
 		idle_restart:1,	     /* restarting after idle? */
 		probe_rtt_round_done:1,  /* a BBR_PROBE_RTT round at 4 pkts? */
-		unused:13,
-		lt_is_sampling:1,    /* taking long-term ("LT") samples now? */
-		lt_rtt_cnt:7,	     /* round trips in long-term interval */
-		lt_use_bw:1;	     /* use lt_bw as our bw estimate? */
-	u32	lt_bw;		     /* LT est delivery rate in pkts/uS << 24 */
-	u32	lt_last_delivered;   /* LT intvl start: tp->delivered */
-	u32	lt_last_stamp;	     /* LT intvl start: tp->delivered_mstamp */
-	u32	lt_last_lost;	     /* LT intvl start: tp->lost */
+		init_cwnd:7,         /* initial cwnd */
+		unused_1:10;
 	u32	pacing_gain:10,	/* current gain for setting pacing rate */
 		cwnd_gain:10,	/* current gain for setting cwnd */
 		full_bw_reached:1,   /* reached full bw in Startup? */
 		full_bw_cnt:2,	/* number of rounds without large bw gains */
-		cycle_idx:3,	/* current index in pacing_gain cycle array */
+		cycle_idx:2,	/* current index in pacing_gain cycle array */
 		has_seen_rtt:1, /* have we seen an RTT sample yet? */
-		unused_b:5;
+		unused_2:6;
 	u32	prior_cwnd;	/* prior cwnd upon entering loss recovery */
 	u32	full_bw;	/* recent bw, to estimate if pipe is full */
 
@@ -124,19 +137,67 @@ struct bbr {
 	u32	ack_epoch_acked:20,	/* packets (S)ACKed in sampling epoch */
 		extra_acked_win_rtts:5,	/* age of extra_acked, in round trips */
 		extra_acked_win_idx:1,	/* current index in extra_acked array */
-		unused_c:6;
+	/* BBR v3 state: */
+		full_bw_now:1,		/* recently reached full bw plateau? */
+		startup_ecn_rounds:2,	/* consecutive hi ECN STARTUP rounds */
+		loss_in_cycle:1,	/* packet loss in this cycle? */
+		ecn_in_cycle:1,		/* ECN in this cycle? */
+		unused_3:1;
+	u32	loss_round_delivered; /* scb->tx.delivered ending loss round */
+	u32	undo_bw_lo;	     /* bw_lo before latest losses */
+	u32	undo_inflight_lo;    /* inflight_lo before latest losses */
+	u32	undo_inflight_hi;    /* inflight_hi before latest losses */
+	u32	bw_latest;	 /* max delivered bw in last round trip */
+	u32	bw_lo;		 /* lower bound on sending bandwidth */
+	u32	bw_hi[2];	 /* max recent measured bw sample */
+	u32	inflight_latest; /* max delivered data in last round trip */
+	u32	inflight_lo;	 /* lower bound of inflight data range */
+	u32	inflight_hi;	 /* upper bound of inflight data range */
+	u32	bw_probe_up_cnt; /* packets delivered per inflight_hi incr */
+	u32	bw_probe_up_acks;  /* packets (S)ACKed since inflight_hi incr */
+	u32	probe_wait_us;	 /* PROBE_DOWN until next clock-driven probe */
+	u32	prior_rcv_nxt;	/* tp->rcv_nxt when CE state last changed */
+	u32	ecn_eligible:1,	/* sender can use ECN (RTT, handshake)? */
+		ecn_alpha:9,	/* EWMA delivered_ce/delivered; 0..256 */
+		bw_probe_samples:1,    /* rate samples reflect bw probing? */
+		prev_probe_too_high:1, /* did last PROBE_UP go too high? */
+		stopped_risky_probe:1, /* last PROBE_UP stopped due to risk? */
+		rounds_since_probe:8,  /* packet-timed rounds since probed bw */
+		loss_round_start:1,    /* loss_round_delivered round trip? */
+		loss_in_round:1,       /* loss marked in this round trip? */
+		ecn_in_round:1,	       /* ECN marked in this round trip? */
+		ack_phase:3,	       /* bbr_ack_phase: meaning of ACKs */
+		loss_events_in_round:4,/* losses in STARTUP round */
+		initialized:1;	       /* has bbr_init() been called? */
+	u32	alpha_last_delivered;	 /* tp->delivered    at alpha update */
+	u32	alpha_last_delivered_ce; /* tp->delivered_ce at alpha update */
+
+	u8	unused_4;		/* to preserve alignment */
+	struct tcp_plb_state plb;
 };
 
-#define CYCLE_LEN	8	/* number of phases in a pacing gain cycle */
+struct bbr_context {
+	u32 sample_bw;
+};
 
-/* Window length of bw filter (in rounds): */
-static const int bbr_bw_rtts = CYCLE_LEN + 2;
 /* Window length of min_rtt filter (in sec): */
 static const u32 bbr_min_rtt_win_sec = 10;
 /* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */
 static const u32 bbr_probe_rtt_mode_ms = 200;
-/* Skip TSO below the following bandwidth (bits/sec): */
-static const int bbr_min_tso_rate = 1200000;
+/* Window length of probe_rtt_min_us filter (in ms), and consequently the
+ * typical interval between PROBE_RTT mode entries. The default is 5000ms.
+ * Note that bbr_probe_rtt_win_ms must be <= bbr_min_rtt_win_sec * MSEC_PER_SEC
+ */
+static const u32 bbr_probe_rtt_win_ms = 5000;
+/* Proportion of cwnd to estimated BDP in PROBE_RTT, in units of BBR_UNIT: */
+static const u32 bbr_probe_rtt_cwnd_gain = BBR_UNIT * 1 / 2;
+
+/* Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting
+ * in bigger TSO bursts. We cut the RTT-based allowance in half
+ * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance
+ * is below 1500 bytes after 6 * ~500 usec = 3ms.
+ */
+static const u32 bbr_tso_rtt_shift = 9;
 
 /* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck.
  * In order to help drive the network toward lower queues and low latency while
@@ -146,13 +207,15 @@ static const int bbr_min_tso_rate = 1200000;
  */
 static const int bbr_pacing_margin_percent = 1;
 
-/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
+/* We use a startup_pacing_gain of 4*ln(2) because it's the smallest value
  * that will allow a smoothly increasing pacing rate that will double each RTT
  * and send the same number of packets per RTT that an un-paced, slow-starting
  * Reno or CUBIC flow would:
  */
-static const int bbr_high_gain  = BBR_UNIT * 2885 / 1000 + 1;
-/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain
+static const int bbr_startup_pacing_gain = BBR_UNIT * 277 / 100 + 1;
+/* The gain for deriving startup cwnd: */
+static const int bbr_startup_cwnd_gain = BBR_UNIT * 2;
+/* The pacing gain in BBR_DRAIN is calculated to typically drain
  * the queue created in BBR_STARTUP in a single round:
  */
 static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885;
@@ -160,13 +223,17 @@ static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885;
 static const int bbr_cwnd_gain  = BBR_UNIT * 2;
 /* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */
 static const int bbr_pacing_gain[] = {
-	BBR_UNIT * 5 / 4,	/* probe for more available bw */
-	BBR_UNIT * 3 / 4,	/* drain queue and/or yield bw to other flows */
-	BBR_UNIT, BBR_UNIT, BBR_UNIT,	/* cruise at 1.0*bw to utilize pipe, */
-	BBR_UNIT, BBR_UNIT, BBR_UNIT	/* without creating excess queue... */
+	BBR_UNIT * 5 / 4,	/* UP: probe for more available bw */
+	BBR_UNIT * 91 / 100,	/* DOWN: drain queue and/or yield bw */
+	BBR_UNIT,		/* CRUISE: try to use pipe w/ some headroom */
+	BBR_UNIT,		/* REFILL: refill pipe to estimated 100% */
+};
+enum bbr_pacing_gain_phase {
+	BBR_BW_PROBE_UP		= 0,  /* push up inflight to probe for bw/vol */
+	BBR_BW_PROBE_DOWN	= 1,  /* drain excess inflight from the queue */
+	BBR_BW_PROBE_CRUISE	= 2,  /* use pipe, w/ headroom in queue/pipe */
+	BBR_BW_PROBE_REFILL	= 3,  /* refill the pipe again to 100% */
 };
-/* Randomize the starting gain cycling phase over N phases: */
-static const u32 bbr_cycle_rand = 7;
 
 /* Try to keep at least this many packets in flight, if things go smoothly. For
  * smooth functioning, a sliding window protocol ACKing every other packet
@@ -174,24 +241,12 @@ static const u32 bbr_cycle_rand = 7;
  */
 static const u32 bbr_cwnd_min_target = 4;
 
-/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */
+/* To estimate if BBR_STARTUP or BBR_BW_PROBE_UP has filled pipe... */
 /* If bw has increased significantly (1.25x), there may be more bw available: */
 static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4;
 /* But after 3 rounds w/o significant bw growth, estimate pipe is full: */
 static const u32 bbr_full_bw_cnt = 3;
 
-/* "long-term" ("LT") bandwidth estimator parameters... */
-/* The minimum number of rounds in an LT bw sampling interval: */
-static const u32 bbr_lt_intvl_min_rtts = 4;
-/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */
-static const u32 bbr_lt_loss_thresh = 50;
-/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */
-static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8;
-/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */
-static const u32 bbr_lt_bw_diff = 4000 / 8;
-/* If we estimate we're policed, use lt_bw for this many round trips: */
-static const u32 bbr_lt_bw_max_rtts = 48;
-
 /* Gain factor for adding extra_acked to target cwnd: */
 static const int bbr_extra_acked_gain = BBR_UNIT;
 /* Window length of extra_acked window. */
@@ -201,8 +256,123 @@ static const u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20;
 /* Time period for clamping cwnd increment due to ack aggregation */
 static const u32 bbr_extra_acked_max_us = 100 * 1000;
 
+/* Flags to control BBR ECN-related behavior... */
+
+/* Ensure ACKs only ACK packets with consistent ECN CE status? */
+static const bool bbr_precise_ece_ack = true;
+
+/* Max RTT (in usec) at which to use sender-side ECN logic.
+ * Disabled when 0 (ECN allowed at any RTT).
+ */
+static const u32 bbr_ecn_max_rtt_us = 5000;
+
+/* On losses, scale down inflight and pacing rate by beta scaled by BBR_SCALE.
+ * No loss response when 0.
+ */
+static const u32 bbr_beta = BBR_UNIT * 30 / 100;
+
+/* Gain factor for ECN mark ratio samples, scaled by BBR_SCALE (1/16 = 6.25%) */
+static const u32 bbr_ecn_alpha_gain = BBR_UNIT * 1 / 16;
+
+/* The initial value for ecn_alpha; 1.0 allows a flow to respond quickly
+ * to congestion if the bottleneck is congested when the flow starts up.
+ */
+static const u32 bbr_ecn_alpha_init = BBR_UNIT;
+
+/* On ECN, cut inflight_lo to (1 - ecn_factor * ecn_alpha) scaled by BBR_SCALE.
+ * No ECN based bounding when 0.
+ */
+static const u32 bbr_ecn_factor = BBR_UNIT * 1 / 3;	 /* 1/3 = 33% */
+
+/* Estimate bw probing has gone too far if CE ratio exceeds this threshold.
+ * Scaled by BBR_SCALE. Disabled when 0.
+ */
+static const u32 bbr_ecn_thresh = BBR_UNIT * 1 / 2;  /* 1/2 = 50% */
+
+/* If non-zero, if in a cycle with no losses but some ECN marks, after ECN
+ * clears then make the first round's increment to inflight_hi the following
+ * fraction of inflight_hi.
+ */
+static const u32 bbr_ecn_reprobe_gain = BBR_UNIT * 1 / 2;
+
+/* Estimate bw probing has gone too far if loss rate exceeds this level. */
+static const u32 bbr_loss_thresh = BBR_UNIT * 2 / 100;  /* 2% loss */
+
+/* Slow down for a packet loss recovered by TLP? */
+static const bool bbr_loss_probe_recovery = true;
+
+/* Exit STARTUP if number of loss marking events in a Recovery round is >= N,
+ * and loss rate is higher than bbr_loss_thresh.
+ * Disabled if 0.
+ */
+static const u32 bbr_full_loss_cnt = 6;
+
+/* Exit STARTUP if number of round trips with ECN mark rate above ecn_thresh
+ * meets this count.
+ */
+static const u32 bbr_full_ecn_cnt = 2;
+
+/* Fraction of unutilized headroom to try to leave in path upon high loss. */
+static const u32 bbr_inflight_headroom = BBR_UNIT * 15 / 100;
+
+/* How much do we increase cwnd_gain when probing for bandwidth in
+ * BBR_BW_PROBE_UP? This specifies the increment in units of
+ * BBR_UNIT/4. The default is 1, meaning 0.25.
+ * The min value is 0 (meaning 0.0); max is 3 (meaning 0.75).
+ */
+static const u32 bbr_bw_probe_cwnd_gain = 1;
+
+/* Max number of packet-timed rounds to wait before probing for bandwidth.  If
+ * we want to tolerate 1% random loss per round, and not have this cut our
+ * inflight too much, we must probe for bw periodically on roughly this scale.
+ * If low, limits Reno/CUBIC coexistence; if high, limits loss tolerance.
+ * We aim to be fair with Reno/CUBIC up to a BDP of at least:
+ *  BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets
+ */
+static const u32 bbr_bw_probe_max_rounds = 63;
+
+/* Max amount of randomness to inject in round counting for Reno-coexistence.
+ */
+static const u32 bbr_bw_probe_rand_rounds = 2;
+
+/* Use BBR-native probe time scale starting at this many usec.
+ * We aim to be fair with Reno/CUBIC up to an inter-loss time epoch of at least:
+ *  BDP*RTT = 25Mbps * .030sec /(1514bytes) * 0.030sec = 1.9 secs
+ */
+static const u32 bbr_bw_probe_base_us = 2 * USEC_PER_SEC;  /* 2 secs */
+
+/* Use BBR-native probes spread over this many usec: */
+static const u32 bbr_bw_probe_rand_us = 1 * USEC_PER_SEC;  /* 1 secs */
+
+/* Use fast path if app-limited, no loss/ECN, and target cwnd was reached? */
+static const bool bbr_fast_path = true;
+
+/* Use fast ack mode? */
+static const bool bbr_fast_ack_mode = true;
+
+static u32 bbr_max_bw(const struct sock *sk);
+static u32 bbr_bw(const struct sock *sk);
+static void bbr_exit_probe_rtt(struct sock *sk);
+static void bbr_reset_congestion_signals(struct sock *sk);
+static void bbr_run_loss_probe_recovery(struct sock *sk);
+
 static void bbr_check_probe_rtt_done(struct sock *sk);
 
+/* This connection can use ECN if both endpoints have signaled ECN support in
+ * the handshake and the per-route settings indicated this is a
+ * shallow-threshold ECN environment, meaning both:
+ *  (a) ECN CE marks indicate low-latency/shallow-threshold congestion, and
+ *  (b) TCP endpoints provide precise ACKs that only ACK data segments
+ *      with consistent ECN CE status
+ */
+static bool bbr_can_use_ecn(const struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+
+	return tcp_ecn_mode_any(tp) &&
+	       (tcp_sk(sk)->ecn_flags & TCP_ECN_LOW);
+}
+
 /* Do we estimate that STARTUP filled the pipe? */
 static bool bbr_full_bw_reached(const struct sock *sk)
 {
@@ -214,17 +384,17 @@ static bool bbr_full_bw_reached(const struct sock *sk)
 /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
 static u32 bbr_max_bw(const struct sock *sk)
 {
-	struct bbr *bbr = inet_csk_ca(sk);
+	const struct bbr *bbr = inet_csk_ca(sk);
 
-	return minmax_get(&bbr->bw);
+	return max(bbr->bw_hi[0], bbr->bw_hi[1]);
 }
 
 /* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */
 static u32 bbr_bw(const struct sock *sk)
 {
-	struct bbr *bbr = inet_csk_ca(sk);
+	const struct bbr *bbr = inet_csk_ca(sk);
 
-	return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk);
+	return min(bbr_max_bw(sk), bbr->bw_lo);
 }
 
 /* Return maximum extra acked in past k-2k round trips,
@@ -241,15 +411,23 @@ static u16 bbr_extra_acked(const struct sock *sk)
  * The order here is chosen carefully to avoid overflow of u64. This should
  * work for input rates of up to 2.9Tbit/sec and gain of 2.89x.
  */
-static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
+static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain,
+				  int margin)
 {
 	unsigned int mss = tcp_sk(sk)->mss_cache;
 
 	rate *= mss;
 	rate *= gain;
 	rate >>= BBR_SCALE;
-	rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_margin_percent);
-	return rate >> BW_SCALE;
+	rate *= USEC_PER_SEC / 100 * (100 - margin);
+	rate >>= BW_SCALE;
+	rate = max(rate, 1ULL);
+	return rate;
+}
+
+static u64 bbr_bw_bytes_per_sec(struct sock *sk, u64 rate)
+{
+	return bbr_rate_bytes_per_sec(sk, rate, BBR_UNIT, 0);
 }
 
 /* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
@@ -257,12 +435,13 @@ static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
 {
 	u64 rate = bw;
 
-	rate = bbr_rate_bytes_per_sec(sk, rate, gain);
+	rate = bbr_rate_bytes_per_sec(sk, rate, gain,
+				      bbr_pacing_margin_percent);
 	rate = min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate));
 	return rate;
 }
 
-/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
+/* Initialize pacing rate to: startup_pacing_gain * init_cwnd / RTT. */
 static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -279,7 +458,8 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
 	bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT;
 	do_div(bw, rtt_us);
 	WRITE_ONCE(sk->sk_pacing_rate,
-		   bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain));
+		   bbr_bw_to_pacing_rate(sk, bw,
+					 bbr_param(sk, startup_pacing_gain)));
 }
 
 /* Pace using current bw estimate and a gain factor. */
@@ -295,26 +475,48 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
 		WRITE_ONCE(sk->sk_pacing_rate, rate);
 }
 
-/* override sysctl_tcp_min_tso_segs */
-__bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk)
+/* Return the number of segments BBR would like in a TSO/GSO skb, given a
+ * particular max gso size as a constraint. TODO: make this simpler and more
+ * consistent by switching bbr to just call tcp_tso_autosize().
+ */
+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now,
+				u32 gso_max_size)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 segs, r;
+	u64 bytes;
+
+	/* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */
+	bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift);
+
+	/* Budget a TSO/GSO burst size allowance based on min_rtt. For every
+	 * K = 2^tso_rtt_shift microseconds of min_rtt, halve the burst.
+	 * The min_rtt-based burst allowance is: 64 KBytes / 2^(min_rtt/K)
+	 */
+	if (bbr_param(sk, tso_rtt_shift)) {
+		r = bbr->min_rtt_us >> bbr_param(sk, tso_rtt_shift);
+		if (r < BITS_PER_TYPE(u32))   /* prevent undefined behavior */
+			bytes += GSO_LEGACY_MAX_SIZE >> r;
+	}
+
+	bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER);
+	segs = max_t(u32, bytes / mss_now,
+		     sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+	return segs;
+}
+
+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */
+__bpf_kfunc static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now)
 {
-	return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2;
+	return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size);
 }
 
+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */
 static u32 bbr_tso_segs_goal(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	u32 segs, bytes;
-
-	/* Sort of tcp_tso_autosize() but ignoring
-	 * driver provided sk_gso_max_size.
-	 */
-	bytes = min_t(unsigned long,
-		      READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift),
-		      GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
-	segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
 
-	return min(segs, 0x7FU);
+	return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_LEGACY_MAX_SIZE);
 }
 
 /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -334,7 +536,9 @@ __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
 
-	if (event == CA_EVENT_TX_START && tp->app_limited) {
+	if (event == CA_EVENT_TX_START) {
+		if (!tp->app_limited)
+			return;
 		bbr->idle_restart = 1;
 		bbr->ack_epoch_mstamp = tp->tcp_mstamp;
 		bbr->ack_epoch_acked = 0;
@@ -345,6 +549,16 @@ __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 			bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT);
 		else if (bbr->mode == BBR_PROBE_RTT)
 			bbr_check_probe_rtt_done(sk);
+	} else if ((event == CA_EVENT_ECN_IS_CE ||
+		    event == CA_EVENT_ECN_NO_CE) &&
+		   bbr_can_use_ecn(sk) &&
+		   bbr_param(sk, precise_ece_ack)) {
+		u32 state = bbr->ce_state;
+		dctcp_ece_ack_update(sk, event, &bbr->prior_rcv_nxt, &state);
+		bbr->ce_state = state;
+	} else if (event == CA_EVENT_TLP_RECOVERY &&
+		   bbr_param(sk, loss_probe_recovery)) {
+		bbr_run_loss_probe_recovery(sk);
 	}
 }
 
@@ -367,10 +581,10 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain)
 	 * default. This should only happen when the connection is not using TCP
 	 * timestamps and has retransmitted all of the SYN/SYNACK/data packets
 	 * ACKed so far. In this case, an RTO can cut cwnd to 1, in which
-	 * case we need to slow-start up toward something safe: TCP_INIT_CWND.
+	 * case we need to slow-start up toward something safe: initial cwnd.
 	 */
 	if (unlikely(bbr->min_rtt_us == ~0U))	 /* no valid RTT samples yet? */
-		return TCP_INIT_CWND;  /* be safe: cap at default initial cwnd*/
+		return bbr->init_cwnd;  /* be safe: cap at initial cwnd */
 
 	w = (u64)bw * bbr->min_rtt_us;
 
@@ -387,23 +601,23 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain)
  *   - one skb in sending host Qdisc,
  *   - one skb in sending host TSO/GSO engine
  *   - one skb being received by receiver host LRO/GRO/delayed-ACK engine
- * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because
- * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets,
+ * Don't worry, at low rates this won't bloat cwnd because
+ * in such cases tso_segs_goal is small. The minimum cwnd is 4 packets,
  * which allows 2 outstanding 2-packet sequences, to try to keep pipe
  * full even with ACK-every-other-packet delayed ACKs.
  */
 static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd)
 {
 	struct bbr *bbr = inet_csk_ca(sk);
+	u32 tso_segs_goal;
 
-	/* Allow enough full-sized skbs in flight to utilize end systems. */
-	cwnd += 3 * bbr_tso_segs_goal(sk);
-
-	/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
-	cwnd = (cwnd + 1) & ~1U;
+	tso_segs_goal = 3 * bbr_tso_segs_goal(sk);
 
+	/* Allow enough full-sized skbs in flight to utilize end systems. */
+	cwnd = max_t(u32, cwnd, tso_segs_goal);
+	cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target));
 	/* Ensure gain cycling gets inflight above BDP even for small BDPs. */
-	if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == 0)
+	if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP)
 		cwnd += 2;
 
 	return cwnd;
@@ -458,10 +672,10 @@ static u32 bbr_ack_aggregation_cwnd(struct sock *sk)
 {
 	u32 max_aggr_cwnd, aggr_cwnd = 0;
 
-	if (bbr_extra_acked_gain && bbr_full_bw_reached(sk)) {
+	if (bbr_param(sk, extra_acked_gain)) {
 		max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us)
 				/ BW_UNIT;
-		aggr_cwnd = (bbr_extra_acked_gain * bbr_extra_acked(sk))
+		aggr_cwnd = (bbr_param(sk, extra_acked_gain) * bbr_extra_acked(sk))
 			     >> BBR_SCALE;
 		aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd);
 	}
@@ -469,66 +683,27 @@ static u32 bbr_ack_aggregation_cwnd(struct sock *sk)
 	return aggr_cwnd;
 }
 
-/* An optimization in BBR to reduce losses: On the first round of recovery, we
- * follow the packet conservation principle: send P packets per P packets acked.
- * After that, we slow-start and send at most 2*P packets per P packets acked.
- * After recovery finishes, or upon undo, we restore the cwnd we had when
- * recovery started (capped by the target cwnd based on estimated BDP).
- *
- * TODO(ycheng/ncardwell): implement a rate-based approach.
- */
-static bool bbr_set_cwnd_to_recover_or_restore(
-	struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd)
+/* Returns the cwnd for PROBE_RTT mode. */
+static u32 bbr_probe_rtt_cwnd(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct bbr *bbr = inet_csk_ca(sk);
-	u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
-	u32 cwnd = tcp_snd_cwnd(tp);
-
-	/* An ACK for P pkts should release at most 2*P packets. We do this
-	 * in two steps. First, here we deduct the number of lost packets.
-	 * Then, in bbr_set_cwnd() we slow start up toward the target cwnd.
-	 */
-	if (rs->losses > 0)
-		cwnd = max_t(s32, cwnd - rs->losses, 1);
-
-	if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) {
-		/* Starting 1st round of Recovery, so do packet conservation. */
-		bbr->packet_conservation = 1;
-		bbr->next_rtt_delivered = tp->delivered;  /* start round now */
-		/* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */
-		cwnd = tcp_packets_in_flight(tp) + acked;
-	} else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) {
-		/* Exiting loss recovery; restore cwnd saved before recovery. */
-		cwnd = max(cwnd, bbr->prior_cwnd);
-		bbr->packet_conservation = 0;
-	}
-	bbr->prev_ca_state = state;
-
-	if (bbr->packet_conservation) {
-		*new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked);
-		return true;	/* yes, using packet conservation */
-	}
-	*new_cwnd = cwnd;
-	return false;
+	return max_t(u32, bbr_param(sk, cwnd_min_target),
+		     bbr_bdp(sk, bbr_bw(sk), bbr_param(sk, probe_rtt_cwnd_gain)));
 }
 
 /* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss
  * has drawn us down below target), or snap down to target if we're above it.
  */
 static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
-			 u32 acked, u32 bw, int gain)
+			 u32 acked, u32 bw, int gain, u32 cwnd,
+			 struct bbr_context *ctx)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
-	u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0;
+	u32 target_cwnd = 0;
 
 	if (!acked)
 		goto done;  /* no packet fully ACKed; just apply caps */
 
-	if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd))
-		goto done;
-
 	target_cwnd = bbr_bdp(sk, bw, gain);
 
 	/* Increment the cwnd to account for excess ACKed data that seems
@@ -537,74 +712,26 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
 	target_cwnd += bbr_ack_aggregation_cwnd(sk);
 	target_cwnd = bbr_quantization_budget(sk, target_cwnd);
 
-	/* If we're below target cwnd, slow start cwnd toward target cwnd. */
-	if (bbr_full_bw_reached(sk))  /* only cut cwnd if we filled the pipe */
-		cwnd = min(cwnd + acked, target_cwnd);
-	else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND)
-		cwnd = cwnd + acked;
-	cwnd = max(cwnd, bbr_cwnd_min_target);
+	/* Update cwnd and enable fast path if cwnd reaches target_cwnd. */
+	bbr->try_fast_path = 0;
+	if (bbr_full_bw_reached(sk)) { /* only cut cwnd if we filled the pipe */
+		cwnd += acked;
+		if (cwnd >= target_cwnd) {
+			cwnd = target_cwnd;
+			bbr->try_fast_path = 1;
+		}
+	} else if (cwnd < target_cwnd || cwnd  < 2 * bbr->init_cwnd) {
+		cwnd += acked;
+	} else {
+		bbr->try_fast_path = 1;
+	}
 
+	cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target));
 done:
-	tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp));	/* apply global cap */
+	tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp));  /* global cap */
 	if (bbr->mode == BBR_PROBE_RTT)  /* drain queue, refresh min_rtt */
-		tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target));
-}
-
-/* End cycle phase if it's time and/or we hit the phase's in-flight target. */
-static bool bbr_is_next_cycle_phase(struct sock *sk,
-				    const struct rate_sample *rs)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct bbr *bbr = inet_csk_ca(sk);
-	bool is_full_length =
-		tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) >
-		bbr->min_rtt_us;
-	u32 inflight, bw;
-
-	/* The pacing_gain of 1.0 paces at the estimated bw to try to fully
-	 * use the pipe without increasing the queue.
-	 */
-	if (bbr->pacing_gain == BBR_UNIT)
-		return is_full_length;		/* just use wall clock time */
-
-	inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight);
-	bw = bbr_max_bw(sk);
-
-	/* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at
-	 * least pacing_gain*BDP; this may take more than min_rtt if min_rtt is
-	 * small (e.g. on a LAN). We do not persist if packets are lost, since
-	 * a path with small buffers may not hold that much.
-	 */
-	if (bbr->pacing_gain > BBR_UNIT)
-		return is_full_length &&
-			(rs->losses ||  /* perhaps pacing_gain*BDP won't fit */
-			 inflight >= bbr_inflight(sk, bw, bbr->pacing_gain));
-
-	/* A pacing_gain < 1.0 tries to drain extra queue we added if bw
-	 * probing didn't find more bw. If inflight falls to match BDP then we
-	 * estimate queue is drained; persisting would underutilize the pipe.
-	 */
-	return is_full_length ||
-		inflight <= bbr_inflight(sk, bw, BBR_UNIT);
-}
-
-static void bbr_advance_cycle_phase(struct sock *sk)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct bbr *bbr = inet_csk_ca(sk);
-
-	bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
-	bbr->cycle_mstamp = tp->delivered_mstamp;
-}
-
-/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
-static void bbr_update_cycle_phase(struct sock *sk,
-				   const struct rate_sample *rs)
-{
-	struct bbr *bbr = inet_csk_ca(sk);
-
-	if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs))
-		bbr_advance_cycle_phase(sk);
+		tcp_snd_cwnd_set(tp, min_t(u32, tcp_snd_cwnd(tp),
+					   bbr_probe_rtt_cwnd(sk)));
 }
 
 static void bbr_reset_startup_mode(struct sock *sk)
@@ -614,191 +741,49 @@ static void bbr_reset_startup_mode(struct sock *sk)
 	bbr->mode = BBR_STARTUP;
 }
 
-static void bbr_reset_probe_bw_mode(struct sock *sk)
-{
-	struct bbr *bbr = inet_csk_ca(sk);
-
-	bbr->mode = BBR_PROBE_BW;
-	bbr->cycle_idx = CYCLE_LEN - 1 - get_random_u32_below(bbr_cycle_rand);
-	bbr_advance_cycle_phase(sk);	/* flip to next phase of gain cycle */
-}
-
-static void bbr_reset_mode(struct sock *sk)
-{
-	if (!bbr_full_bw_reached(sk))
-		bbr_reset_startup_mode(sk);
-	else
-		bbr_reset_probe_bw_mode(sk);
-}
-
-/* Start a new long-term sampling interval. */
-static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct bbr *bbr = inet_csk_ca(sk);
-
-	bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC);
-	bbr->lt_last_delivered = tp->delivered;
-	bbr->lt_last_lost = tp->lost;
-	bbr->lt_rtt_cnt = 0;
-}
-
-/* Completely reset long-term bandwidth sampling. */
-static void bbr_reset_lt_bw_sampling(struct sock *sk)
-{
-	struct bbr *bbr = inet_csk_ca(sk);
-
-	bbr->lt_bw = 0;
-	bbr->lt_use_bw = 0;
-	bbr->lt_is_sampling = false;
-	bbr_reset_lt_bw_sampling_interval(sk);
-}
-
-/* Long-term bw sampling interval is done. Estimate whether we're policed. */
-static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw)
-{
-	struct bbr *bbr = inet_csk_ca(sk);
-	u32 diff;
-
-	if (bbr->lt_bw) {  /* do we have bw from a previous interval? */
-		/* Is new bw close to the lt_bw from the previous interval? */
-		diff = abs(bw - bbr->lt_bw);
-		if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) ||
-		    (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <=
-		     bbr_lt_bw_diff)) {
-			/* All criteria are met; estimate we're policed. */
-			bbr->lt_bw = (bw + bbr->lt_bw) >> 1;  /* avg 2 intvls */
-			bbr->lt_use_bw = 1;
-			bbr->pacing_gain = BBR_UNIT;  /* try to avoid drops */
-			bbr->lt_rtt_cnt = 0;
-			return;
-		}
-	}
-	bbr->lt_bw = bw;
-	bbr_reset_lt_bw_sampling_interval(sk);
-}
-
-/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of
- * Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and
- * explicitly models their policed rate, to reduce unnecessary losses. We
- * estimate that we're policed if we see 2 consecutive sampling intervals with
- * consistent throughput and high packet loss. If we think we're being policed,
- * set lt_bw to the "long-term" average delivery rate from those 2 intervals.
+/* See if we have reached next round trip. Upon start of the new round,
+ * returns packets delivered since previous round start plus this ACK.
  */
-static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct bbr *bbr = inet_csk_ca(sk);
-	u32 lost, delivered;
-	u64 bw;
-	u32 t;
-
-	if (bbr->lt_use_bw) {	/* already using long-term rate, lt_bw? */
-		if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
-		    ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) {
-			bbr_reset_lt_bw_sampling(sk);    /* stop using lt_bw */
-			bbr_reset_probe_bw_mode(sk);  /* restart gain cycling */
-		}
-		return;
-	}
-
-	/* Wait for the first loss before sampling, to let the policer exhaust
-	 * its tokens and estimate the steady-state rate allowed by the policer.
-	 * Starting samples earlier includes bursts that over-estimate the bw.
-	 */
-	if (!bbr->lt_is_sampling) {
-		if (!rs->losses)
-			return;
-		bbr_reset_lt_bw_sampling_interval(sk);
-		bbr->lt_is_sampling = true;
-	}
-
-	/* To avoid underestimates, reset sampling if we run out of data. */
-	if (rs->is_app_limited) {
-		bbr_reset_lt_bw_sampling(sk);
-		return;
-	}
-
-	if (bbr->round_start)
-		bbr->lt_rtt_cnt++;	/* count round trips in this interval */
-	if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts)
-		return;		/* sampling interval needs to be longer */
-	if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) {
-		bbr_reset_lt_bw_sampling(sk);  /* interval is too long */
-		return;
-	}
-
-	/* End sampling interval when a packet is lost, so we estimate the
-	 * policer tokens were exhausted. Stopping the sampling before the
-	 * tokens are exhausted under-estimates the policed rate.
-	 */
-	if (!rs->losses)
-		return;
-
-	/* Calculate packets lost and delivered in sampling interval. */
-	lost = tp->lost - bbr->lt_last_lost;
-	delivered = tp->delivered - bbr->lt_last_delivered;
-	/* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */
-	if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered)
-		return;
-
-	/* Find average delivery rate in this sampling interval. */
-	t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp;
-	if ((s32)t < 1)
-		return;		/* interval is less than one ms, so wait */
-	/* Check if can multiply without overflow */
-	if (t >= ~0U / USEC_PER_MSEC) {
-		bbr_reset_lt_bw_sampling(sk);  /* interval too long; reset */
-		return;
-	}
-	t *= USEC_PER_MSEC;
-	bw = (u64)delivered * BW_UNIT;
-	do_div(bw, t);
-	bbr_lt_bw_interval_done(sk, bw);
-}
-
-/* Estimate the bandwidth based on how fast packets are delivered */
-static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
+static u32 bbr_update_round_start(struct sock *sk,
+		const struct rate_sample *rs, struct bbr_context *ctx)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
-	u64 bw;
+	u32 round_delivered = 0;
 
 	bbr->round_start = 0;
-	if (rs->delivered < 0 || rs->interval_us <= 0)
-		return; /* Not a valid observation */
 
 	/* See if we've reached the next RTT */
-	if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) {
+	if (rs->interval_us > 0 &&
+	    !before(rs->prior_delivered, bbr->next_rtt_delivered)) {
+		round_delivered = tp->delivered - bbr->next_rtt_delivered;
 		bbr->next_rtt_delivered = tp->delivered;
-		bbr->rtt_cnt++;
 		bbr->round_start = 1;
-		bbr->packet_conservation = 0;
 	}
+	return round_delivered;
+}
 
-	bbr_lt_bw_sampling(sk, rs);
+/* Calculate the bandwidth based on how fast packets are delivered */
+static void bbr_calculate_bw_sample(struct sock *sk,
+			const struct rate_sample *rs, struct bbr_context *ctx)
+{
+	u64 bw = 0;
 
 	/* Divide delivered by the interval to find a (lower bound) bottleneck
 	 * bandwidth sample. Delivered is in packets and interval_us in uS and
 	 * ratio will be <<1 for most connections. So delivered is first scaled.
+	 * Round up to allow growth at low rates, even with integer division.
 	 */
-	bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us);
-
-	/* If this sample is application-limited, it is likely to have a very
-	 * low delivered count that represents application behavior rather than
-	 * the available network rate. Such a sample could drag down estimated
-	 * bw, causing needless slow-down. Thus, to continue to send at the
-	 * last measured network rate, we filter out app-limited samples unless
-	 * they describe the path bw at least as well as our bw model.
-	 *
-	 * So the goal during app-limited phase is to proceed with the best
-	 * network rate no matter how long. We automatically leave this
-	 * phase when app writes faster than the network can deliver :)
-	 */
-	if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) {
-		/* Incorporate new sample into our max bw filter. */
-		minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw);
+	if (rs->interval_us > 0) {
+		if (WARN_ONCE(rs->delivered < 0,
+			      "negative delivered: %d interval_us: %ld\n",
+			      rs->delivered, rs->interval_us))
+			return;
+
+		bw = DIV_ROUND_UP_ULL((u64)rs->delivered * BW_UNIT, rs->interval_us);
 	}
+
+	ctx->sample_bw = bw;
 }
 
 /* Estimates the windowed max degree of ack aggregation.
@@ -812,7 +797,7 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
  *
  * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms).
  * Max filter is an approximate sliding window of 5-10 (packet timed) round
- * trips.
+ * trips for non-startup phase, and 1-2 round trips for startup.
  */
 static void bbr_update_ack_aggregation(struct sock *sk,
 				       const struct rate_sample *rs)
@@ -820,15 +805,19 @@ static void bbr_update_ack_aggregation(struct sock *sk,
 	u32 epoch_us, expected_acked, extra_acked;
 	struct bbr *bbr = inet_csk_ca(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
+	u32 extra_acked_win_rtts_thresh = bbr_param(sk, extra_acked_win_rtts);
 
-	if (!bbr_extra_acked_gain || rs->acked_sacked <= 0 ||
+	if (!bbr_param(sk, extra_acked_gain) || rs->acked_sacked <= 0 ||
 	    rs->delivered < 0 || rs->interval_us <= 0)
 		return;
 
 	if (bbr->round_start) {
 		bbr->extra_acked_win_rtts = min(0x1F,
 						bbr->extra_acked_win_rtts + 1);
-		if (bbr->extra_acked_win_rtts >= bbr_extra_acked_win_rtts) {
+		if (!bbr_full_bw_reached(sk))
+			extra_acked_win_rtts_thresh = 1;
+		if (bbr->extra_acked_win_rtts >=
+		    extra_acked_win_rtts_thresh) {
 			bbr->extra_acked_win_rtts = 0;
 			bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ?
 						   0 : 1;
@@ -862,49 +851,6 @@ static void bbr_update_ack_aggregation(struct sock *sk,
 		bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked;
 }
 
-/* Estimate when the pipe is full, using the change in delivery rate: BBR
- * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by
- * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited
- * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the
- * higher rwin, 3: we get higher delivery rate samples. Or transient
- * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar
- * design goal, but uses delay and inter-ACK spacing instead of bandwidth.
- */
-static void bbr_check_full_bw_reached(struct sock *sk,
-				      const struct rate_sample *rs)
-{
-	struct bbr *bbr = inet_csk_ca(sk);
-	u32 bw_thresh;
-
-	if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited)
-		return;
-
-	bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE;
-	if (bbr_max_bw(sk) >= bw_thresh) {
-		bbr->full_bw = bbr_max_bw(sk);
-		bbr->full_bw_cnt = 0;
-		return;
-	}
-	++bbr->full_bw_cnt;
-	bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt;
-}
-
-/* If pipe is probably full, drain the queue and then enter steady-state. */
-static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
-{
-	struct bbr *bbr = inet_csk_ca(sk);
-
-	if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
-		bbr->mode = BBR_DRAIN;	/* drain queue we created */
-		tcp_sk(sk)->snd_ssthresh =
-				bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
-	}	/* fall through to check if in-flight is already small: */
-	if (bbr->mode == BBR_DRAIN &&
-	    bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
-	    bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT))
-		bbr_reset_probe_bw_mode(sk);  /* we estimate queue is drained */
-}
-
 static void bbr_check_probe_rtt_done(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -914,9 +860,9 @@ static void bbr_check_probe_rtt_done(struct sock *sk)
 	      after(tcp_jiffies32, bbr->probe_rtt_done_stamp)))
 		return;
 
-	bbr->min_rtt_stamp = tcp_jiffies32;  /* wait a while until PROBE_RTT */
+	bbr->probe_rtt_min_stamp = tcp_jiffies32; /* schedule next PROBE_RTT */
 	tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd));
-	bbr_reset_mode(sk);
+	bbr_exit_probe_rtt(sk);
 }
 
 /* The goal of PROBE_RTT mode is to have BBR flows cooperatively and
@@ -942,23 +888,35 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
-	bool filter_expired;
+	bool probe_rtt_expired, min_rtt_expired;
+	u32 expire;
 
-	/* Track min RTT seen in the min_rtt_win_sec filter window: */
-	filter_expired = after(tcp_jiffies32,
-			       bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
+	/* Track min RTT in probe_rtt_win_ms to time next PROBE_RTT state. */
+	expire = bbr->probe_rtt_min_stamp +
+		 msecs_to_jiffies(bbr_param(sk, probe_rtt_win_ms));
+	probe_rtt_expired = after(tcp_jiffies32, expire);
 	if (rs->rtt_us >= 0 &&
-	    (rs->rtt_us < bbr->min_rtt_us ||
-	     (filter_expired && !rs->is_ack_delayed))) {
-		bbr->min_rtt_us = rs->rtt_us;
-		bbr->min_rtt_stamp = tcp_jiffies32;
+	    (rs->rtt_us < bbr->probe_rtt_min_us ||
+	     (probe_rtt_expired && !rs->is_ack_delayed))) {
+		bbr->probe_rtt_min_us = rs->rtt_us;
+		bbr->probe_rtt_min_stamp = tcp_jiffies32;
+	}
+	/* Track min RTT seen in the min_rtt_win_sec filter window: */
+	expire = bbr->min_rtt_stamp + bbr_param(sk, min_rtt_win_sec) * HZ;
+	min_rtt_expired = after(tcp_jiffies32, expire);
+	if (bbr->probe_rtt_min_us <= bbr->min_rtt_us ||
+	    min_rtt_expired) {
+		bbr->min_rtt_us = bbr->probe_rtt_min_us;
+		bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp;
 	}
 
-	if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
+	if (bbr_param(sk, probe_rtt_mode_ms) > 0 && probe_rtt_expired &&
 	    !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
 		bbr->mode = BBR_PROBE_RTT;  /* dip, drain queue */
 		bbr_save_cwnd(sk);  /* note cwnd so we can restore it */
 		bbr->probe_rtt_done_stamp = 0;
+		bbr->ack_phase = BBR_ACKS_PROBE_STOPPING;
+		bbr->next_rtt_delivered = tp->delivered;
 	}
 
 	if (bbr->mode == BBR_PROBE_RTT) {
@@ -967,9 +925,9 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
 			(tp->delivered + tcp_packets_in_flight(tp)) ? : 1;
 		/* Maintain min packets in flight for max(200 ms, 1 round). */
 		if (!bbr->probe_rtt_done_stamp &&
-		    tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) {
+		    tcp_packets_in_flight(tp) <= bbr_probe_rtt_cwnd(sk)) {
 			bbr->probe_rtt_done_stamp = tcp_jiffies32 +
-				msecs_to_jiffies(bbr_probe_rtt_mode_ms);
+				msecs_to_jiffies(bbr_param(sk, probe_rtt_mode_ms));
 			bbr->probe_rtt_round_done = 0;
 			bbr->next_rtt_delivered = tp->delivered;
 		} else if (bbr->probe_rtt_done_stamp) {
@@ -990,18 +948,20 @@ static void bbr_update_gains(struct sock *sk)
 
 	switch (bbr->mode) {
 	case BBR_STARTUP:
-		bbr->pacing_gain = bbr_high_gain;
-		bbr->cwnd_gain	 = bbr_high_gain;
+		bbr->pacing_gain = bbr_param(sk, startup_pacing_gain);
+		bbr->cwnd_gain	 = bbr_param(sk, startup_cwnd_gain);
 		break;
 	case BBR_DRAIN:
-		bbr->pacing_gain = bbr_drain_gain;	/* slow, to drain */
-		bbr->cwnd_gain	 = bbr_high_gain;	/* keep cwnd */
+		bbr->pacing_gain = bbr_param(sk, drain_gain);  /* slow, to drain */
+		bbr->cwnd_gain	 = bbr_param(sk, startup_cwnd_gain);  /* keep cwnd */
 		break;
 	case BBR_PROBE_BW:
-		bbr->pacing_gain = (bbr->lt_use_bw ?
-				    BBR_UNIT :
-				    bbr_pacing_gain[bbr->cycle_idx]);
-		bbr->cwnd_gain	 = bbr_cwnd_gain;
+		bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx];
+		bbr->cwnd_gain	 = bbr_param(sk, cwnd_gain);
+		if (bbr_param(sk, bw_probe_cwnd_gain) &&
+		    bbr->cycle_idx == BBR_BW_PROBE_UP)
+			bbr->cwnd_gain +=
+				BBR_UNIT * bbr_param(sk, bw_probe_cwnd_gain) / 4;
 		break;
 	case BBR_PROBE_RTT:
 		bbr->pacing_gain = BBR_UNIT;
@@ -1013,144 +973,1387 @@ static void bbr_update_gains(struct sock *sk)
 	}
 }
 
-static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
+__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk)
 {
-	bbr_update_bw(sk, rs);
-	bbr_update_ack_aggregation(sk, rs);
-	bbr_update_cycle_phase(sk, rs);
-	bbr_check_full_bw_reached(sk, rs);
-	bbr_check_drain(sk, rs);
-	bbr_update_min_rtt(sk, rs);
-	bbr_update_gains(sk);
+	/* Provision 3 * cwnd since BBR may slow-start even during recovery. */
+	return 3;
 }
 
-__bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs)
+/* Incorporate a new bw sample into the current window of our max filter. */
+static void bbr_take_max_bw_sample(struct sock *sk, u32 bw)
 {
 	struct bbr *bbr = inet_csk_ca(sk);
-	u32 bw;
-
-	bbr_update_model(sk, rs);
 
-	bw = bbr_bw(sk);
-	bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
-	bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
+	bbr->bw_hi[1] = max(bw, bbr->bw_hi[1]);
 }
 
-__bpf_kfunc static void bbr_init(struct sock *sk)
+/* Keep max of last 1-2 cycles. Each PROBE_BW cycle, flip filter window. */
+static void bbr_advance_max_bw_filter(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
 
-	bbr->prior_cwnd = 0;
-	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
-	bbr->rtt_cnt = 0;
-	bbr->next_rtt_delivered = tp->delivered;
-	bbr->prev_ca_state = TCP_CA_Open;
-	bbr->packet_conservation = 0;
-
-	bbr->probe_rtt_done_stamp = 0;
-	bbr->probe_rtt_round_done = 0;
-	bbr->min_rtt_us = tcp_min_rtt(tp);
-	bbr->min_rtt_stamp = tcp_jiffies32;
-
-	minmax_reset(&bbr->bw, bbr->rtt_cnt, 0);  /* init max bw to 0 */
+	if (!bbr->bw_hi[1])
+		return;  /* no samples in this window; remember old window */
+	bbr->bw_hi[0] = bbr->bw_hi[1];
+	bbr->bw_hi[1] = 0;
+}
 
-	bbr->has_seen_rtt = 0;
-	bbr_init_pacing_rate_from_rtt(sk);
+/* Reset the estimator for reaching full bandwidth based on bw plateau. */
+static void bbr_reset_full_bw(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
 
-	bbr->round_start = 0;
-	bbr->idle_restart = 0;
-	bbr->full_bw_reached = 0;
 	bbr->full_bw = 0;
 	bbr->full_bw_cnt = 0;
-	bbr->cycle_mstamp = 0;
-	bbr->cycle_idx = 0;
-	bbr_reset_lt_bw_sampling(sk);
-	bbr_reset_startup_mode(sk);
+	bbr->full_bw_now = 0;
+}
 
-	bbr->ack_epoch_mstamp = tp->tcp_mstamp;
-	bbr->ack_epoch_acked = 0;
-	bbr->extra_acked_win_rtts = 0;
-	bbr->extra_acked_win_idx = 0;
-	bbr->extra_acked[0] = 0;
-	bbr->extra_acked[1] = 0;
+/* How much do we want in flight? Our BDP, unless congestion cut cwnd. */
+static u32 bbr_target_inflight(struct sock *sk)
+{
+	u32 bdp = bbr_inflight(sk, bbr_bw(sk), BBR_UNIT);
 
-	cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
+	return min(bdp, tcp_sk(sk)->snd_cwnd);
 }
 
-__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk)
+static bool bbr_is_probing_bandwidth(struct sock *sk)
 {
-	/* Provision 3 * cwnd since BBR may slow-start even during recovery. */
-	return 3;
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	return (bbr->mode == BBR_STARTUP) ||
+		(bbr->mode == BBR_PROBE_BW &&
+		 (bbr->cycle_idx == BBR_BW_PROBE_REFILL ||
+		  bbr->cycle_idx == BBR_BW_PROBE_UP));
+}
+
+/* Has the given amount of time elapsed since we marked the phase start? */
+static bool bbr_has_elapsed_in_phase(const struct sock *sk, u32 interval_us)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	const struct bbr *bbr = inet_csk_ca(sk);
+
+	return tcp_stamp_us_delta(tp->tcp_mstamp,
+				  bbr->cycle_mstamp + interval_us) > 0;
+}
+
+static void bbr_handle_queue_too_high_in_startup(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 bdp;  /* estimated BDP in packets, with quantization budget */
+
+	bbr->full_bw_reached = 1;
+
+	bdp = bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
+	bbr->inflight_hi = max(bdp, bbr->inflight_latest);
+}
+
+/* Exit STARTUP upon N consecutive rounds with ECN mark rate > ecn_thresh. */
+static void bbr_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if (bbr_full_bw_reached(sk) || !bbr->ecn_eligible ||
+	    !bbr_param(sk, full_ecn_cnt) || !bbr_param(sk, ecn_thresh))
+		return;
+
+	if (ce_ratio >= bbr_param(sk, ecn_thresh))
+		bbr->startup_ecn_rounds++;
+	else
+		bbr->startup_ecn_rounds = 0;
+
+	if (bbr->startup_ecn_rounds >= bbr_param(sk, full_ecn_cnt)) {
+		bbr_handle_queue_too_high_in_startup(sk);
+		return;
+	}
+}
+
+/* Updates ecn_alpha and returns ce_ratio. -1 if not available. */
+static int bbr_update_ecn_alpha(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct net *net = sock_net(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	s32 delivered, delivered_ce;
+	u64 alpha, ce_ratio;
+	u32 gain;
+	bool want_ecn_alpha;
+
+	/* See if we should use ECN sender logic for this connection. */
+	if (!bbr->ecn_eligible && bbr_can_use_ecn(sk) &&
+	    !!bbr_param(sk, ecn_factor) &&
+	    (bbr->min_rtt_us <= bbr_ecn_max_rtt_us ||
+	     !bbr_ecn_max_rtt_us))
+		bbr->ecn_eligible = 1;
+
+	/* Skip updating alpha only if not ECN-eligible and PLB is disabled. */
+	want_ecn_alpha = (bbr->ecn_eligible ||
+			  (bbr_can_use_ecn(sk) &&
+			   READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled)));
+	if (!want_ecn_alpha)
+		return -1;
+
+	delivered = tp->delivered - bbr->alpha_last_delivered;
+	delivered_ce = tp->delivered_ce - bbr->alpha_last_delivered_ce;
+
+	if (delivered == 0 ||		/* avoid divide by zero */
+	    WARN_ON_ONCE(delivered < 0 || delivered_ce < 0))  /* backwards? */
+		return -1;
+
+	BUILD_BUG_ON(BBR_SCALE != TCP_PLB_SCALE);
+	ce_ratio = (u64)delivered_ce << BBR_SCALE;
+	do_div(ce_ratio, delivered);
+
+	gain = bbr_param(sk, ecn_alpha_gain);
+	alpha = ((BBR_UNIT - gain) * bbr->ecn_alpha) >> BBR_SCALE;
+	alpha += (gain * ce_ratio) >> BBR_SCALE;
+	bbr->ecn_alpha = min_t(u32, alpha, BBR_UNIT);
+
+	bbr->alpha_last_delivered = tp->delivered;
+	bbr->alpha_last_delivered_ce = tp->delivered_ce;
+
+	bbr_check_ecn_too_high_in_startup(sk, ce_ratio);
+	return (int)ce_ratio;
 }
 
-/* In theory BBR does not need to undo the cwnd since it does not
- * always reduce cwnd on losses (see bbr_main()). Keep it for now.
+/* Protective Load Balancing (PLB). PLB rehashes outgoing data (to a new IPv6
+ * flow label) if it encounters sustained congestion in the form of ECN marks.
  */
-__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk)
+static void bbr_plb(struct sock *sk, const struct rate_sample *rs, int ce_ratio)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if (bbr->round_start && ce_ratio >= 0)
+		tcp_plb_update_state(sk, &bbr->plb, ce_ratio);
+
+	tcp_plb_check_rehash(sk, &bbr->plb);
+}
+
+/* Each round trip of BBR_BW_PROBE_UP, double volume of probing data. */
+static void bbr_raise_inflight_hi_slope(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 growth_this_round, cnt;
+
+	/* Calculate "slope": packets S/Acked per inflight_hi increment. */
+	growth_this_round = 1 << bbr->bw_probe_up_rounds;
+	bbr->bw_probe_up_rounds = min(bbr->bw_probe_up_rounds + 1, 30);
+	cnt = tcp_snd_cwnd(tp) / growth_this_round;
+	cnt = max(cnt, 1U);
+	bbr->bw_probe_up_cnt = cnt;
+}
+
+/* In BBR_BW_PROBE_UP, not seeing high loss/ECN/queue, so raise inflight_hi. */
+static void bbr_probe_inflight_hi_upward(struct sock *sk,
+					  const struct rate_sample *rs)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 delta;
+
+	if (!tp->is_cwnd_limited || tcp_snd_cwnd(tp) < bbr->inflight_hi)
+		return;  /* not fully using inflight_hi, so don't grow it */
+
+	/* For each bw_probe_up_cnt packets ACKed, increase inflight_hi by 1. */
+	bbr->bw_probe_up_acks += rs->acked_sacked;
+	if (bbr->bw_probe_up_acks >=  bbr->bw_probe_up_cnt) {
+		delta = bbr->bw_probe_up_acks / bbr->bw_probe_up_cnt;
+		bbr->bw_probe_up_acks -= delta * bbr->bw_probe_up_cnt;
+		bbr->inflight_hi += delta;
+		bbr->try_fast_path = 0;  /* Need to update cwnd */
+	}
+
+	if (bbr->round_start)
+		bbr_raise_inflight_hi_slope(sk);
+}
+
+/* Does loss/ECN rate for this sample say inflight is "too high"?
+ * This is used by both the bbr_check_loss_too_high_in_startup() function,
+ * and in PROBE_UP.
+ */
+static bool bbr_is_inflight_too_high(const struct sock *sk,
+				      const struct rate_sample *rs)
+{
+	const struct bbr *bbr = inet_csk_ca(sk);
+	u32 loss_thresh, ecn_thresh;
+
+	if (rs->lost > 0 && rs->tx_in_flight) {
+		loss_thresh = (u64)rs->tx_in_flight * bbr_param(sk, loss_thresh) >>
+				BBR_SCALE;
+		if (rs->lost > loss_thresh) {
+			return true;
+		}
+	}
+
+	if (rs->delivered_ce > 0 && rs->delivered > 0 &&
+	    bbr->ecn_eligible && !!bbr_param(sk, ecn_thresh)) {
+		ecn_thresh = (u64)rs->delivered * bbr_param(sk, ecn_thresh) >>
+				BBR_SCALE;
+		if (rs->delivered_ce > ecn_thresh) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/* Calculate the tx_in_flight level that corresponded to excessive loss.
+ * We find "lost_prefix" segs of the skb where loss rate went too high,
+ * by solving for "lost_prefix" in the following equation:
+ *   lost                     /  inflight                     >= loss_thresh
+ *  (lost_prev + lost_prefix) / (inflight_prev + lost_prefix) >= loss_thresh
+ * Then we take that equation, convert it to fixed point, and
+ * round up to the nearest packet.
+ */
+static u32 bbr_inflight_hi_from_lost_skb(const struct sock *sk,
+					  const struct rate_sample *rs,
+					  const struct sk_buff *skb)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	u32 loss_thresh  = bbr_param(sk, loss_thresh);
+	u32 pcount, divisor, inflight_hi;
+	s32 inflight_prev, lost_prev;
+	u64 loss_budget, lost_prefix;
+
+	pcount = tcp_skb_pcount(skb);
+
+	/* How much data was in flight before this skb? */
+	inflight_prev = rs->tx_in_flight - pcount;
+	if (inflight_prev < 0) {
+		WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious(
+				  pcount,
+				  TCP_SKB_CB(skb)->sacked,
+				  rs->tx_in_flight),
+			  "tx_in_flight: %u pcount: %u reneg: %u",
+			  rs->tx_in_flight, pcount, tcp_sk(sk)->is_sack_reneg);
+		return ~0U;
+	}
+
+	/* How much inflight data was marked lost before this skb? */
+	lost_prev = rs->lost - pcount;
+	if (WARN_ONCE(lost_prev < 0,
+		      "cwnd: %u ca: %d out: %u lost: %u pif: %u "
+		      "tx_in_flight: %u tx.lost: %u tp->lost: %u rs->lost: %d "
+		      "lost_prev: %d pcount: %d seq: %u end_seq: %u reneg: %u",
+		      tcp_snd_cwnd(tp), inet_csk(sk)->icsk_ca_state,
+		      tp->packets_out, tp->lost_out, tcp_packets_in_flight(tp),
+		      rs->tx_in_flight, TCP_SKB_CB(skb)->tx.lost, tp->lost,
+		      rs->lost, lost_prev, pcount,
+		      TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
+		      tp->is_sack_reneg))
+		return ~0U;
+
+	/* At what prefix of this lost skb did losss rate exceed loss_thresh? */
+	loss_budget = (u64)inflight_prev * loss_thresh + BBR_UNIT - 1;
+	loss_budget >>= BBR_SCALE;
+	if (lost_prev >= loss_budget) {
+		lost_prefix = 0;   /* previous losses crossed loss_thresh */
+	} else {
+		lost_prefix = loss_budget - lost_prev;
+		lost_prefix <<= BBR_SCALE;
+		divisor = BBR_UNIT - loss_thresh;
+		if (WARN_ON_ONCE(!divisor))  /* loss_thresh is 8 bits */
+			return ~0U;
+		do_div(lost_prefix, divisor);
+	}
+
+	inflight_hi = inflight_prev + lost_prefix;
+	return inflight_hi;
+}
+
+/* If loss/ECN rates during probing indicated we may have overfilled a
+ * buffer, return an operating point that tries to leave unutilized headroom in
+ * the path for other flows, for fairness convergence and lower RTTs and loss.
+ */
+static u32 bbr_inflight_with_headroom(const struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 headroom, headroom_fraction;
+
+	if (bbr->inflight_hi == ~0U)
+		return ~0U;
+
+	headroom_fraction = bbr_param(sk, inflight_headroom);
+	headroom = ((u64)bbr->inflight_hi * headroom_fraction) >> BBR_SCALE;
+	headroom = max(headroom, 1U);
+	return max_t(s32, bbr->inflight_hi - headroom,
+		     bbr_param(sk, cwnd_min_target));
+}
+
+/* Bound cwnd to a sensible level, based on our current probing state
+ * machine phase and model of a good inflight level (inflight_lo, inflight_hi).
+ */
+static void bbr_bound_cwnd_for_inflight_model(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 cap;
+
+	/* tcp_rcv_synsent_state_process() currently calls tcp_ack()
+	 * and thus cong_control() without first initializing us(!).
+	 */
+	if (!bbr->initialized)
+		return;
+
+	cap = ~0U;
+	if (bbr->mode == BBR_PROBE_BW &&
+	    bbr->cycle_idx != BBR_BW_PROBE_CRUISE) {
+		/* Probe to see if more packets fit in the path. */
+		cap = bbr->inflight_hi;
+	} else {
+		if (bbr->mode == BBR_PROBE_RTT ||
+		    (bbr->mode == BBR_PROBE_BW &&
+		     bbr->cycle_idx == BBR_BW_PROBE_CRUISE))
+			cap = bbr_inflight_with_headroom(sk);
+	}
+	/* Adapt to any loss/ECN since our last bw probe. */
+	cap = min(cap, bbr->inflight_lo);
+
+	cap = max_t(u32, cap, bbr_param(sk, cwnd_min_target));
+	tcp_snd_cwnd_set(tp, min(cap, tcp_snd_cwnd(tp)));
+}
+
+/* How should we multiplicatively cut bw or inflight limits based on ECN? */
+static u32 bbr_ecn_cut(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	return BBR_UNIT -
+		((bbr->ecn_alpha * bbr_param(sk, ecn_factor)) >> BBR_SCALE);
+}
+
+/* Init lower bounds if have not inited yet. */
+static void bbr_init_lower_bounds(struct sock *sk, bool init_bw)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if (init_bw && bbr->bw_lo == ~0U)
+		bbr->bw_lo = bbr_max_bw(sk);
+	if (bbr->inflight_lo == ~0U)
+		bbr->inflight_lo = tcp_snd_cwnd(tp);
+}
+
+/* Reduce bw and inflight to (1 - beta). */
+static void bbr_loss_lower_bounds(struct sock *sk, u32 *bw, u32 *inflight)
+{
+	struct bbr* bbr = inet_csk_ca(sk);
+	u32 loss_cut = BBR_UNIT - bbr_param(sk, beta);
+
+	*bw = max_t(u32, bbr->bw_latest,
+		    (u64)bbr->bw_lo * loss_cut >> BBR_SCALE);
+	*inflight = max_t(u32, bbr->inflight_latest,
+			  (u64)bbr->inflight_lo * loss_cut >> BBR_SCALE);
+}
+
+/* Reduce inflight to (1 - alpha*ecn_factor). */
+static void bbr_ecn_lower_bounds(struct sock *sk, u32 *inflight)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 ecn_cut = bbr_ecn_cut(sk);
+
+	*inflight = (u64)bbr->inflight_lo * ecn_cut >> BBR_SCALE;
+}
+
+/* Estimate a short-term lower bound on the capacity available now, based
+ * on measurements of the current delivery process and recent history. When we
+ * are seeing loss/ECN at times when we are not probing bw, then conservatively
+ * move toward flow balance by multiplicatively cutting our short-term
+ * estimated safe rate and volume of data (bw_lo and inflight_lo). We use a
+ * multiplicative decrease in order to converge to a lower capacity in time
+ * logarithmic in the magnitude of the decrease.
+ *
+ * However, we do not cut our short-term estimates lower than the current rate
+ * and volume of delivered data from this round trip, since from the current
+ * delivery process we can estimate the measured capacity available now.
+ *
+ * Anything faster than that approach would knowingly risk high loss, which can
+ * cause low bw for Reno/CUBIC and high loss recovery latency for
+ * request/response flows using any congestion control.
+ */
+static void bbr_adapt_lower_bounds(struct sock *sk,
+				    const struct rate_sample *rs)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 ecn_inflight_lo = ~0U;
+
+	/* We only use lower-bound estimates when not probing bw.
+	 * When probing we need to push inflight higher to probe bw.
+	 */
+	if (bbr_is_probing_bandwidth(sk))
+		return;
+
+	/* ECN response. */
+	if (bbr->ecn_in_round && !!bbr_param(sk, ecn_factor)) {
+		bbr_init_lower_bounds(sk, false);
+		bbr_ecn_lower_bounds(sk, &ecn_inflight_lo);
+	}
+
+	/* Loss response. */
+	if (bbr->loss_in_round) {
+		bbr_init_lower_bounds(sk, true);
+		bbr_loss_lower_bounds(sk, &bbr->bw_lo, &bbr->inflight_lo);
+	}
+
+	/* Adjust to the lower of the levels implied by loss/ECN. */
+	bbr->inflight_lo = min(bbr->inflight_lo, ecn_inflight_lo);
+	bbr->bw_lo = max(1U, bbr->bw_lo);
+}
+
+/* Reset any short-term lower-bound adaptation to congestion, so that we can
+ * push our inflight up.
+ */
+static void bbr_reset_lower_bounds(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->bw_lo = ~0U;
+	bbr->inflight_lo = ~0U;
+}
+
+/* After bw probing (STARTUP/PROBE_UP), reset signals before entering a state
+ * machine phase where we adapt our lower bound based on congestion signals.
+ */
+static void bbr_reset_congestion_signals(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->loss_in_round = 0;
+	bbr->ecn_in_round = 0;
+	bbr->loss_in_cycle = 0;
+	bbr->ecn_in_cycle = 0;
+	bbr->bw_latest = 0;
+	bbr->inflight_latest = 0;
+}
+
+static void bbr_exit_loss_recovery(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd));
+	bbr->try_fast_path = 0; /* bound cwnd using latest model */
+}
+
+/* Update rate and volume of delivered data from latest round trip. */
+static void bbr_update_latest_delivery_signals(
+	struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->loss_round_start = 0;
+	if (rs->interval_us <= 0 || !rs->acked_sacked)
+		return; /* Not a valid observation */
+
+	bbr->bw_latest       = max_t(u32, bbr->bw_latest,       ctx->sample_bw);
+	bbr->inflight_latest = max_t(u32, bbr->inflight_latest, rs->delivered);
+
+	if (!before(rs->prior_delivered, bbr->loss_round_delivered)) {
+		bbr->loss_round_delivered = tp->delivered;
+		bbr->loss_round_start = 1;  /* mark start of new round trip */
+	}
+}
+
+/* Once per round, reset filter for latest rate and volume of delivered data. */
+static void bbr_advance_latest_delivery_signals(
+	struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	/* If ACK matches a TLP retransmit, persist the filter. If we detect
+	 * that a TLP retransmit plugged a tail loss, we'll want to remember
+	 * how much data the path delivered before the tail loss.
+	 */
+	if (bbr->loss_round_start && !rs->is_acking_tlp_retrans_seq) {
+		bbr->bw_latest = ctx->sample_bw;
+		bbr->inflight_latest = rs->delivered;
+	}
+}
+
+/* Update (most of) our congestion signals: track the recent rate and volume of
+ * delivered data, presence of loss, and EWMA degree of ECN marking.
+ */
+static void bbr_update_congestion_signals(
+	struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u64 bw;
+
+	if (rs->interval_us <= 0 || !rs->acked_sacked)
+		return; /* Not a valid observation */
+	bw = ctx->sample_bw;
+
+	if (!rs->is_app_limited || bw >= bbr_max_bw(sk))
+		bbr_take_max_bw_sample(sk, bw);
+
+	bbr->loss_in_round |= (rs->losses > 0);
+
+	if (!bbr->loss_round_start)
+		return;		/* skip the per-round-trip updates */
+	/* Now do per-round-trip updates. */
+	bbr_adapt_lower_bounds(sk, rs);
+
+	bbr->loss_in_round = 0;
+	bbr->ecn_in_round  = 0;
+}
+
+/* Bandwidth probing can cause loss. To help coexistence with loss-based
+ * congestion control we spread out our probing in a Reno-conscious way. Due to
+ * the shape of the Reno sawtooth, the time required between loss epochs for an
+ * idealized Reno flow is a number of round trips that is the BDP of that
+ * flow. We count packet-timed round trips directly, since measured RTT can
+ * vary widely, and Reno is driven by packet-timed round trips.
+ */
+static bool bbr_is_reno_coexistence_probe_time(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 rounds;
+
+	/* Random loss can shave some small percentage off of our inflight
+	 * in each round. To survive this, flows need robust periodic probes.
+	 */
+	rounds = min_t(u32, bbr_param(sk, bw_probe_max_rounds), bbr_target_inflight(sk));
+	return bbr->rounds_since_probe >= rounds;
+}
+
+/* How long do we want to wait before probing for bandwidth (and risking
+ * loss)? We randomize the wait, for better mixing and fairness convergence.
+ *
+ * We bound the Reno-coexistence inter-bw-probe time to be 62-63 round trips.
+ * This is calculated to allow fairness with a 25Mbps, 30ms Reno flow,
+ * (eg 4K video to a broadband user):
+ *   BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets
+ *
+ * We bound the BBR-native inter-bw-probe wall clock time to be:
+ *  (a) higher than 2 sec: to try to avoid causing loss for a long enough time
+ *      to allow Reno at 30ms to get 4K video bw, the inter-bw-probe time must
+ *      be at least: 25Mbps * .030sec / (1514bytes) * 0.030sec = 1.9secs
+ *  (b) lower than 3 sec: to ensure flows can start probing in a reasonable
+ *      amount of time to discover unutilized bw on human-scale interactive
+ *      time-scales (e.g. perhaps traffic from a web page download that we
+ *      were competing with is now complete).
+ */
+static void bbr_pick_probe_wait(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	/* Decide the random round-trip bound for wait until probe: */
+	bbr->rounds_since_probe =
+		get_random_u32_below(bbr_param(sk, bw_probe_rand_rounds));
+	/* Decide the random wall clock bound for wait until probe: */
+	bbr->probe_wait_us = bbr_param(sk, bw_probe_base_us) +
+			     get_random_u32_below(bbr_param(sk, bw_probe_rand_us));
+}
+
+static void bbr_set_cycle_idx(struct sock *sk, int cycle_idx)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->cycle_idx = cycle_idx;
+	/* New phase, so need to update cwnd and pacing rate. */
+	bbr->try_fast_path = 0;
+}
+
+/* Send at estimated bw to fill the pipe, but not queue. We need this phase
+ * before PROBE_UP, because as soon as we send faster than the available bw
+ * we will start building a queue, and if the buffer is shallow we can cause
+ * loss. If we do not fill the pipe before we cause this loss, our bw_hi and
+ * inflight_hi estimates will underestimate.
+ */
+static void bbr_start_bw_probe_refill(struct sock *sk, u32 bw_probe_up_rounds)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr_reset_lower_bounds(sk);
+	bbr->bw_probe_up_rounds = bw_probe_up_rounds;
+	bbr->bw_probe_up_acks = 0;
+	bbr->stopped_risky_probe = 0;
+	bbr->ack_phase = BBR_ACKS_REFILLING;
+	bbr->next_rtt_delivered = tp->delivered;
+	bbr_set_cycle_idx(sk, BBR_BW_PROBE_REFILL);
+}
+
+/* Now probe max deliverable data rate and volume. */
+static void bbr_start_bw_probe_up(struct sock *sk, struct bbr_context *ctx)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->ack_phase = BBR_ACKS_PROBE_STARTING;
+	bbr->next_rtt_delivered = tp->delivered;
+	bbr->cycle_mstamp = tp->tcp_mstamp;
+	bbr_reset_full_bw(sk);
+	bbr->full_bw = ctx->sample_bw;
+	bbr_set_cycle_idx(sk, BBR_BW_PROBE_UP);
+	bbr_raise_inflight_hi_slope(sk);
+}
+
+/* Start a new PROBE_BW probing cycle of some wall clock length. Pick a wall
+ * clock time at which to probe beyond an inflight that we think to be
+ * safe. This will knowingly risk packet loss, so we want to do this rarely, to
+ * keep packet loss rates low. Also start a round-trip counter, to probe faster
+ * if we estimate a Reno flow at our BDP would probe faster.
+ */
+static void bbr_start_bw_probe_down(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr_reset_congestion_signals(sk);
+	bbr->bw_probe_up_cnt = ~0U;     /* not growing inflight_hi any more */
+	bbr_pick_probe_wait(sk);
+	bbr->cycle_mstamp = tp->tcp_mstamp;		/* start wall clock */
+	bbr->ack_phase = BBR_ACKS_PROBE_STOPPING;
+	bbr->next_rtt_delivered = tp->delivered;
+	bbr_set_cycle_idx(sk, BBR_BW_PROBE_DOWN);
+}
+
+/* Cruise: maintain what we estimate to be a neutral, conservative
+ * operating point, without attempting to probe up for bandwidth or down for
+ * RTT, and only reducing inflight in response to loss/ECN signals.
+ */
+static void bbr_start_bw_probe_cruise(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if (bbr->inflight_lo != ~0U)
+		bbr->inflight_lo = min(bbr->inflight_lo, bbr->inflight_hi);
+
+	bbr_set_cycle_idx(sk, BBR_BW_PROBE_CRUISE);
+}
+
+/* Loss and/or ECN rate is too high while probing.
+ * Adapt (once per bw probe) by cutting inflight_hi and then restarting cycle.
+ */
+static void bbr_handle_inflight_too_high(struct sock *sk,
+					  const struct rate_sample *rs)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	const u32 beta = bbr_param(sk, beta);
+
+	bbr->prev_probe_too_high = 1;
+	bbr->bw_probe_samples = 0;  /* only react once per probe */
+	/* If we are app-limited then we are not robustly
+	 * probing the max volume of inflight data we think
+	 * might be safe (analogous to how app-limited bw
+	 * samples are not known to be robustly probing bw).
+	 */
+	if (!rs->is_app_limited) {
+		bbr->inflight_hi = max_t(u32, rs->tx_in_flight,
+					 (u64)bbr_target_inflight(sk) *
+					 (BBR_UNIT - beta) >> BBR_SCALE);
+	}
+	if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP)
+		bbr_start_bw_probe_down(sk);
+}
+
+/* If we're seeing bw and loss samples reflecting our bw probing, adapt
+ * using the signals we see. If loss or ECN mark rate gets too high, then adapt
+ * inflight_hi downward. If we're able to push inflight higher without such
+ * signals, push higher: adapt inflight_hi upward.
+ */
+static bool bbr_adapt_upper_bounds(struct sock *sk,
+				    const struct rate_sample *rs,
+				    struct bbr_context *ctx)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	/* Track when we'll see bw/loss samples resulting from our bw probes. */
+	if (bbr->ack_phase == BBR_ACKS_PROBE_STARTING && bbr->round_start)
+		bbr->ack_phase = BBR_ACKS_PROBE_FEEDBACK;
+	if (bbr->ack_phase == BBR_ACKS_PROBE_STOPPING && bbr->round_start) {
+		/* End of samples from bw probing phase. */
+		bbr->bw_probe_samples = 0;
+		bbr->ack_phase = BBR_ACKS_INIT;
+		/* At this point in the cycle, our current bw sample is also
+		 * our best recent chance at finding the highest available bw
+		 * for this flow. So now is the best time to forget the bw
+		 * samples from the previous cycle, by advancing the window.
+		 */
+		if (bbr->mode == BBR_PROBE_BW && !rs->is_app_limited)
+			bbr_advance_max_bw_filter(sk);
+		/* If we had an inflight_hi, then probed and pushed inflight all
+		 * the way up to hit that inflight_hi without seeing any
+		 * high loss/ECN in all the resulting ACKs from that probing,
+		 * then probe up again, this time letting inflight persist at
+		 * inflight_hi for a round trip, then accelerating beyond.
+		 */
+		if (bbr->mode == BBR_PROBE_BW &&
+		    bbr->stopped_risky_probe && !bbr->prev_probe_too_high) {
+			bbr_start_bw_probe_refill(sk, 0);
+			return true;  /* yes, decided state transition */
+		}
+	}
+	if (bbr_is_inflight_too_high(sk, rs)) {
+		if (bbr->bw_probe_samples)  /*  sample is from bw probing? */
+			bbr_handle_inflight_too_high(sk, rs);
+	} else {
+		/* Loss/ECN rate is declared safe. Adjust upper bound upward. */
+
+		if (bbr->inflight_hi == ~0U)
+			return false;   /* no excess queue signals yet */
+
+		/* To be resilient to random loss, we must raise bw/inflight_hi
+		 * if we observe in any phase that a higher level is safe.
+		 */
+		if (rs->tx_in_flight > bbr->inflight_hi) {
+			bbr->inflight_hi = rs->tx_in_flight;
+		}
+
+		if (bbr->mode == BBR_PROBE_BW &&
+		    bbr->cycle_idx == BBR_BW_PROBE_UP)
+			bbr_probe_inflight_hi_upward(sk, rs);
+	}
+
+	return false;
+}
+
+/* Check if it's time to probe for bandwidth now, and if so, kick it off. */
+static bool bbr_check_time_to_probe_bw(struct sock *sk,
+					const struct rate_sample *rs)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 n;
+
+	/* If we seem to be at an operating point where we are not seeing loss
+	 * but we are seeing ECN marks, then when the ECN marks cease we reprobe
+	 * quickly (in case cross-traffic has ceased and freed up bw).
+	 */
+	if (bbr_param(sk, ecn_reprobe_gain) && bbr->ecn_eligible &&
+	    bbr->ecn_in_cycle && !bbr->loss_in_cycle &&
+	    inet_csk(sk)->icsk_ca_state == TCP_CA_Open) {
+		/* Calculate n so that when bbr_raise_inflight_hi_slope()
+		 * computes growth_this_round as 2^n it will be roughly the
+		 * desired volume of data (inflight_hi*ecn_reprobe_gain).
+		 */
+		n = ilog2((((u64)bbr->inflight_hi *
+			    bbr_param(sk, ecn_reprobe_gain)) >> BBR_SCALE));
+		bbr_start_bw_probe_refill(sk, n);
+		return true;
+	}
+
+	if (bbr_has_elapsed_in_phase(sk, bbr->probe_wait_us) ||
+	    bbr_is_reno_coexistence_probe_time(sk)) {
+		bbr_start_bw_probe_refill(sk, 0);
+		return true;
+	}
+	return false;
+}
+
+/* Is it time to transition from PROBE_DOWN to PROBE_CRUISE? */
+static bool bbr_check_time_to_cruise(struct sock *sk, u32 inflight, u32 bw)
+{
+	/* Always need to pull inflight down to leave headroom in queue. */
+	if (inflight > bbr_inflight_with_headroom(sk))
+		return false;
+
+	return inflight <= bbr_inflight(sk, bw, BBR_UNIT);
+}
+
+/* PROBE_BW state machine: cruise, refill, probe for bw, or drain? */
+static void bbr_update_cycle_phase(struct sock *sk,
+				    const struct rate_sample *rs,
+				    struct bbr_context *ctx)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
+	bool is_bw_probe_done = false;
+	u32 inflight, bw;
+
+	if (!bbr_full_bw_reached(sk))
+		return;
+
+	/* In DRAIN, PROBE_BW, or PROBE_RTT, adjust upper bounds. */
+	if (bbr_adapt_upper_bounds(sk, rs, ctx))
+		return;		/* already decided state transition */
+
+	if (bbr->mode != BBR_PROBE_BW)
+		return;
+
+	inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight);
+	bw = bbr_max_bw(sk);
+
+	switch (bbr->cycle_idx) {
+	/* First we spend most of our time cruising with a pacing_gain of 1.0,
+	 * which paces at the estimated bw, to try to fully use the pipe
+	 * without building queue. If we encounter loss/ECN marks, we adapt
+	 * by slowing down.
+	 */
+	case BBR_BW_PROBE_CRUISE:
+		if (bbr_check_time_to_probe_bw(sk, rs))
+			return;		/* already decided state transition */
+		break;
+
+	/* After cruising, when it's time to probe, we first "refill": we send
+	 * at the estimated bw to fill the pipe, before probing higher and
+	 * knowingly risking overflowing the bottleneck buffer (causing loss).
+	 */
+	case BBR_BW_PROBE_REFILL:
+		if (bbr->round_start) {
+			/* After one full round trip of sending in REFILL, we
+			 * start to see bw samples reflecting our REFILL, which
+			 * may be putting too much data in flight.
+			 */
+			bbr->bw_probe_samples = 1;
+			bbr_start_bw_probe_up(sk, ctx);
+		}
+		break;
 
-	bbr->full_bw = 0;   /* spurious slow-down; reset full pipe detection */
+	/* After we refill the pipe, we probe by using a pacing_gain > 1.0, to
+	 * probe for bw. If we have not seen loss/ECN, we try to raise inflight
+	 * to at least pacing_gain*BDP; note that this may take more than
+	 * min_rtt if min_rtt is small (e.g. on a LAN).
+	 *
+	 * We terminate PROBE_UP bandwidth probing upon any of the following:
+	 *
+	 * (1) We've pushed inflight up to hit the inflight_hi target set in the
+	 *     most recent previous bw probe phase. Thus we want to start
+	 *     draining the queue immediately because it's very likely the most
+	 *     recently sent packets will fill the queue and cause drops.
+	 * (2) If inflight_hi has not limited bandwidth growth recently, and
+	 *     yet delivered bandwidth has not increased much recently
+	 *     (bbr->full_bw_now).
+	 * (3) Loss filter says loss rate is "too high".
+	 * (4) ECN filter says ECN mark rate is "too high".
+	 *
+	 * (1) (2) checked here, (3) (4) checked in bbr_is_inflight_too_high()
+	 */
+	case BBR_BW_PROBE_UP:
+		if (bbr->prev_probe_too_high &&
+		    inflight >= bbr->inflight_hi) {
+			bbr->stopped_risky_probe = 1;
+			is_bw_probe_done = true;
+		} else {
+			if (tp->is_cwnd_limited &&
+			    tcp_snd_cwnd(tp) >= bbr->inflight_hi) {
+				/* inflight_hi is limiting bw growth */
+				bbr_reset_full_bw(sk);
+				bbr->full_bw = ctx->sample_bw;
+			} else if (bbr->full_bw_now) {
+				/* Plateau in estimated bw. Pipe looks full. */
+				is_bw_probe_done = true;
+			}
+		}
+		if (is_bw_probe_done) {
+			bbr->prev_probe_too_high = 0;  /* no loss/ECN (yet) */
+			bbr_start_bw_probe_down(sk);  /* restart w/ down */
+		}
+		break;
+
+	/* After probing in PROBE_UP, we have usually accumulated some data in
+	 * the bottleneck buffer (if bw probing didn't find more bw). We next
+	 * enter PROBE_DOWN to try to drain any excess data from the queue. To
+	 * do this, we use a pacing_gain < 1.0. We hold this pacing gain until
+	 * our inflight is less then that target cruising point, which is the
+	 * minimum of (a) the amount needed to leave headroom, and (b) the
+	 * estimated BDP. Once inflight falls to match the target, we estimate
+	 * the queue is drained; persisting would underutilize the pipe.
+	 */
+	case BBR_BW_PROBE_DOWN:
+		if (bbr_check_time_to_probe_bw(sk, rs))
+			return;		/* already decided state transition */
+		if (bbr_check_time_to_cruise(sk, inflight, bw))
+			bbr_start_bw_probe_cruise(sk);
+		break;
+
+	default:
+		WARN_ONCE(1, "BBR invalid cycle index %u\n", bbr->cycle_idx);
+	}
+}
+
+/* Exiting PROBE_RTT, so return to bandwidth probing in STARTUP or PROBE_BW. */
+static void bbr_exit_probe_rtt(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr_reset_lower_bounds(sk);
+	if (bbr_full_bw_reached(sk)) {
+		bbr->mode = BBR_PROBE_BW;
+		/* Raising inflight after PROBE_RTT may cause loss, so reset
+		 * the PROBE_BW clock and schedule the next bandwidth probe for
+		 * a friendly and randomized future point in time.
+		 */
+		bbr_start_bw_probe_down(sk);
+		/* Since we are exiting PROBE_RTT, we know inflight is
+		 * below our estimated BDP, so it is reasonable to cruise.
+		 */
+		bbr_start_bw_probe_cruise(sk);
+	} else {
+		bbr->mode = BBR_STARTUP;
+	}
+}
+
+/* Exit STARTUP based on loss rate > 1% and loss gaps in round >= N. Wait until
+ * the end of the round in recovery to get a good estimate of how many packets
+ * have been lost, and how many we need to drain with a low pacing rate.
+ */
+static void bbr_check_loss_too_high_in_startup(struct sock *sk,
+						const struct rate_sample *rs)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if (bbr_full_bw_reached(sk))
+		return;
+
+	/* For STARTUP exit, check the loss rate at the end of each round trip
+	 * of Recovery episodes in STARTUP. We check the loss rate at the end
+	 * of the round trip to filter out noisy/low loss and have a better
+	 * sense of inflight (extent of loss), so we can drain more accurately.
+	 */
+	if (rs->losses && bbr->loss_events_in_round < 0xf)
+		bbr->loss_events_in_round++;  /* update saturating counter */
+	if (bbr_param(sk, full_loss_cnt) && bbr->loss_round_start &&
+	    inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery &&
+	    bbr->loss_events_in_round >= bbr_param(sk, full_loss_cnt) &&
+	    bbr_is_inflight_too_high(sk, rs)) {
+		bbr_handle_queue_too_high_in_startup(sk);
+		return;
+	}
+	if (bbr->loss_round_start)
+		bbr->loss_events_in_round = 0;
+}
+
+/* Estimate when the pipe is full, using the change in delivery rate: BBR
+ * estimates bw probing filled the pipe if the estimated bw hasn't changed by
+ * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited
+ * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the
+ * higher rwin, 3: we get higher delivery rate samples. Or transient
+ * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar
+ * design goal, but uses delay and inter-ACK spacing instead of bandwidth.
+ */
+static void bbr_check_full_bw_reached(struct sock *sk,
+				       const struct rate_sample *rs,
+				       struct bbr_context *ctx)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 bw_thresh, full_cnt, thresh;
+
+	if (bbr->full_bw_now || rs->is_app_limited)
+		return;
+
+	thresh = bbr_param(sk, full_bw_thresh);
+	full_cnt = bbr_param(sk, full_bw_cnt);
+	bw_thresh = (u64)bbr->full_bw * thresh >> BBR_SCALE;
+	if (ctx->sample_bw >= bw_thresh) {
+		bbr_reset_full_bw(sk);
+		bbr->full_bw = ctx->sample_bw;
+		return;
+	}
+	if (!bbr->round_start)
+		return;
+	++bbr->full_bw_cnt;
+	bbr->full_bw_now = bbr->full_bw_cnt >= full_cnt;
+	bbr->full_bw_reached |= bbr->full_bw_now;
+}
+
+/* If pipe is probably full, drain the queue and then enter steady-state. */
+static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs,
+			    struct bbr_context *ctx)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
+		bbr->mode = BBR_DRAIN;	/* drain queue we created */
+		/* Set ssthresh to export purely for monitoring, to signal
+		 * completion of initial STARTUP by setting to a non-
+		 * TCP_INFINITE_SSTHRESH value (ssthresh is not used by BBR).
+		 */
+		tcp_sk(sk)->snd_ssthresh =
+				bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
+		bbr_reset_congestion_signals(sk);
+	}	/* fall through to check if in-flight is already small: */
+	if (bbr->mode == BBR_DRAIN &&
+	    bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
+	    bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) {
+		bbr->mode = BBR_PROBE_BW;
+		bbr_start_bw_probe_down(sk);
+	}
+}
+
+static void bbr_update_model(struct sock *sk, const struct rate_sample *rs,
+			      struct bbr_context *ctx)
+{
+	bbr_update_congestion_signals(sk, rs, ctx);
+	bbr_update_ack_aggregation(sk, rs);
+	bbr_check_loss_too_high_in_startup(sk, rs);
+	bbr_check_full_bw_reached(sk, rs, ctx);
+	bbr_check_drain(sk, rs, ctx);
+	bbr_update_cycle_phase(sk, rs, ctx);
+	bbr_update_min_rtt(sk, rs);
+}
+
+/* Fast path for app-limited case.
+ *
+ * On each ack, we execute bbr state machine, which primarily consists of:
+ * 1) update model based on new rate sample, and
+ * 2) update control based on updated model or state change.
+ *
+ * There are certain workload/scenarios, e.g. app-limited case, where
+ * either we can skip updating model or we can skip update of both model
+ * as well as control. This provides signifcant softirq cpu savings for
+ * processing incoming acks.
+ *
+ * In case of app-limited, if there is no congestion (loss/ecn) and
+ * if observed bw sample is less than current estimated bw, then we can
+ * skip some of the computation in bbr state processing:
+ *
+ * - if there is no rtt/mode/phase change: In this case, since all the
+ *   parameters of the network model are constant, we can skip model
+ *   as well control update.
+ *
+ * - else we can skip rest of the model update. But we still need to
+ *   update the control to account for the new rtt/mode/phase.
+ *
+ * Returns whether we can take fast path or not.
+ */
+static bool bbr_run_fast_path(struct sock *sk, bool *update_model,
+		const struct rate_sample *rs, struct bbr_context *ctx)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 prev_min_rtt_us, prev_mode;
+
+	if (bbr_param(sk, fast_path) && bbr->try_fast_path &&
+	    rs->is_app_limited && ctx->sample_bw < bbr_max_bw(sk) &&
+	    !bbr->loss_in_round && !bbr->ecn_in_round ) {
+		prev_mode = bbr->mode;
+		prev_min_rtt_us = bbr->min_rtt_us;
+		bbr_check_drain(sk, rs, ctx);
+		bbr_update_cycle_phase(sk, rs, ctx);
+		bbr_update_min_rtt(sk, rs);
+
+		if (bbr->mode == prev_mode &&
+		    bbr->min_rtt_us == prev_min_rtt_us &&
+		    bbr->try_fast_path) {
+			return true;
+		}
+
+		/* Skip model update, but control still needs to be updated */
+		*update_model = false;
+	}
+	return false;
+}
+
+__bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag,
+				 const struct rate_sample *rs)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	struct bbr_context ctx = { 0 };
+	bool update_model = true;
+	u32 bw, round_delivered;
+	int ce_ratio = -1;
+
+	round_delivered = bbr_update_round_start(sk, rs, &ctx);
+	if (bbr->round_start) {
+		bbr->rounds_since_probe =
+			min_t(s32, bbr->rounds_since_probe + 1, 0xFF);
+		ce_ratio = bbr_update_ecn_alpha(sk);
+	}
+	bbr_plb(sk, rs, ce_ratio);
+
+	bbr->ecn_in_round  |= (bbr->ecn_eligible && rs->is_ece);
+	bbr_calculate_bw_sample(sk, rs, &ctx);
+	bbr_update_latest_delivery_signals(sk, rs, &ctx);
+
+	if (bbr_run_fast_path(sk, &update_model, rs, &ctx))
+		goto out;
+
+	if (update_model)
+		bbr_update_model(sk, rs, &ctx);
+
+	bbr_update_gains(sk);
+	bw = bbr_bw(sk);
+	bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
+	bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain,
+		     tcp_snd_cwnd(tp), &ctx);
+	bbr_bound_cwnd_for_inflight_model(sk);
+
+out:
+	bbr_advance_latest_delivery_signals(sk, rs, &ctx);
+	bbr->prev_ca_state = inet_csk(sk)->icsk_ca_state;
+	bbr->loss_in_cycle |= rs->lost > 0;
+	bbr->ecn_in_cycle  |= rs->delivered_ce > 0;
+}
+
+__bpf_kfunc static void bbr_init(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->initialized = 1;
+
+	bbr->init_cwnd = min(0x7FU, tcp_snd_cwnd(tp));
+	bbr->prior_cwnd = tp->prior_cwnd;
+	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+	bbr->next_rtt_delivered = tp->delivered;
+	bbr->prev_ca_state = TCP_CA_Open;
+
+	bbr->probe_rtt_done_stamp = 0;
+	bbr->probe_rtt_round_done = 0;
+	bbr->probe_rtt_min_us = tcp_min_rtt(tp);
+	bbr->probe_rtt_min_stamp = tcp_jiffies32;
+	bbr->min_rtt_us = tcp_min_rtt(tp);
+	bbr->min_rtt_stamp = tcp_jiffies32;
+
+	bbr->has_seen_rtt = 0;
+	bbr_init_pacing_rate_from_rtt(sk);
+
+	bbr->round_start = 0;
+	bbr->idle_restart = 0;
+	bbr->full_bw_reached = 0;
+	bbr->full_bw = 0;
 	bbr->full_bw_cnt = 0;
-	bbr_reset_lt_bw_sampling(sk);
-	return tcp_snd_cwnd(tcp_sk(sk));
+	bbr->cycle_mstamp = 0;
+	bbr->cycle_idx = 0;
+
+	bbr_reset_startup_mode(sk);
+
+	bbr->ack_epoch_mstamp = tp->tcp_mstamp;
+	bbr->ack_epoch_acked = 0;
+	bbr->extra_acked_win_rtts = 0;
+	bbr->extra_acked_win_idx = 0;
+	bbr->extra_acked[0] = 0;
+	bbr->extra_acked[1] = 0;
+
+	bbr->ce_state = 0;
+	bbr->prior_rcv_nxt = tp->rcv_nxt;
+	bbr->try_fast_path = 0;
+
+	cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
+
+	/* Start sampling ECN mark rate after first full flight is ACKed: */
+	bbr->loss_round_delivered = tp->delivered + 1;
+	bbr->loss_round_start = 0;
+	bbr->undo_bw_lo = 0;
+	bbr->undo_inflight_lo = 0;
+	bbr->undo_inflight_hi = 0;
+	bbr->loss_events_in_round = 0;
+	bbr->startup_ecn_rounds = 0;
+	bbr_reset_congestion_signals(sk);
+	bbr->bw_lo = ~0U;
+	bbr->bw_hi[0] = 0;
+	bbr->bw_hi[1] = 0;
+	bbr->inflight_lo = ~0U;
+	bbr->inflight_hi = ~0U;
+	bbr_reset_full_bw(sk);
+	bbr->bw_probe_up_cnt = ~0U;
+	bbr->bw_probe_up_acks = 0;
+	bbr->bw_probe_up_rounds = 0;
+	bbr->probe_wait_us = 0;
+	bbr->stopped_risky_probe = 0;
+	bbr->ack_phase = BBR_ACKS_INIT;
+	bbr->rounds_since_probe = 0;
+	bbr->bw_probe_samples = 0;
+	bbr->prev_probe_too_high = 0;
+	bbr->ecn_eligible = 0;
+	bbr->ecn_alpha = bbr_param(sk, ecn_alpha_init);
+	bbr->alpha_last_delivered = 0;
+	bbr->alpha_last_delivered_ce = 0;
+	bbr->plb.pause_until = 0;
+
+	tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0;
+
+	if (bbr_can_use_ecn(sk))
+		tp->ecn_flags |= TCP_ECN_ECT_PERMANENT;
+}
+
+/* BBR marks the current round trip as a loss round. */
+static void bbr_note_loss(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	/* Capture "current" data over the full round trip of loss, to
+	 * have a better chance of observing the full capacity of the path.
+	 */
+	if (!bbr->loss_in_round)  /* first loss in this round trip? */
+		bbr->loss_round_delivered = tp->delivered;  /* set round trip */
+	bbr->loss_in_round = 1;
+	bbr->loss_in_cycle = 1;
 }
 
-/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
+/* Core TCP stack informs us that the given skb was just marked lost. */
+__bpf_kfunc static void bbr_skb_marked_lost(struct sock *sk,
+					    const struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+	struct rate_sample rs = {};
+
+	bbr_note_loss(sk);
+
+	if (!bbr->bw_probe_samples)
+		return;  /* not an skb sent while probing for bandwidth */
+	if (unlikely(!scb->tx.delivered_mstamp))
+		return;  /* skb was SACKed, reneged, marked lost; ignore it */
+	/* We are probing for bandwidth. Construct a rate sample that
+	 * estimates what happened in the flight leading up to this lost skb,
+	 * then see if the loss rate went too high, and if so at which packet.
+	 */
+	rs.tx_in_flight = scb->tx.in_flight;
+	rs.lost = tp->lost - scb->tx.lost;
+	rs.is_app_limited = scb->tx.is_app_limited;
+	if (bbr_is_inflight_too_high(sk, &rs)) {
+		rs.tx_in_flight = bbr_inflight_hi_from_lost_skb(sk, &rs, skb);
+		bbr_handle_inflight_too_high(sk, &rs);
+	}
+}
+
+static void bbr_run_loss_probe_recovery(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	struct rate_sample rs = {0};
+
+	bbr_note_loss(sk);
+
+	if (!bbr->bw_probe_samples)
+		return;  /* not sent while probing for bandwidth */
+	/* We are probing for bandwidth. Construct a rate sample that
+	 * estimates what happened in the flight leading up to this
+	 * loss, then see if the loss rate went too high.
+	 */
+	rs.lost = 1;	/* TLP probe repaired loss of a single segment */
+	rs.tx_in_flight = bbr->inflight_latest + rs.lost;
+	rs.is_app_limited = tp->tlp_orig_data_app_limited;
+	if (bbr_is_inflight_too_high(sk, &rs))
+		bbr_handle_inflight_too_high(sk, &rs);
+}
+
+/* Revert short-term model if current loss recovery event was spurious. */
+__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr_reset_full_bw(sk); /* spurious slow-down; reset full bw detector */
+	bbr->loss_in_round = 0;
+
+	/* Revert to cwnd and other state saved before loss episode. */
+	bbr->bw_lo = max(bbr->bw_lo, bbr->undo_bw_lo);
+	bbr->inflight_lo = max(bbr->inflight_lo, bbr->undo_inflight_lo);
+	bbr->inflight_hi = max(bbr->inflight_hi, bbr->undo_inflight_hi);
+	bbr->try_fast_path = 0;  /* take slow path to set proper cwnd, pacing */
+	return bbr->prior_cwnd;
+}
+
+/* Entering loss recovery, so save state for when we undo recovery. */
 __bpf_kfunc static u32 bbr_ssthresh(struct sock *sk)
 {
+	struct bbr *bbr = inet_csk_ca(sk);
+
 	bbr_save_cwnd(sk);
+	/* For undo, save state that adapts based on loss signal. */
+	bbr->undo_bw_lo		= bbr->bw_lo;
+	bbr->undo_inflight_lo	= bbr->inflight_lo;
+	bbr->undo_inflight_hi	= bbr->inflight_hi;
 	return tcp_sk(sk)->snd_ssthresh;
 }
 
+static enum tcp_bbr_phase bbr_get_phase(struct bbr *bbr)
+{
+	switch (bbr->mode) {
+	case BBR_STARTUP:
+		return BBR_PHASE_STARTUP;
+	case BBR_DRAIN:
+		return BBR_PHASE_DRAIN;
+	case BBR_PROBE_BW:
+		break;
+	case BBR_PROBE_RTT:
+		return BBR_PHASE_PROBE_RTT;
+	default:
+		return BBR_PHASE_INVALID;
+	}
+	switch (bbr->cycle_idx) {
+	case BBR_BW_PROBE_UP:
+		return BBR_PHASE_PROBE_BW_UP;
+	case BBR_BW_PROBE_DOWN:
+		return BBR_PHASE_PROBE_BW_DOWN;
+	case BBR_BW_PROBE_CRUISE:
+		return BBR_PHASE_PROBE_BW_CRUISE;
+	case BBR_BW_PROBE_REFILL:
+		return BBR_PHASE_PROBE_BW_REFILL;
+	default:
+		return BBR_PHASE_INVALID;
+	}
+}
+
 static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
-			   union tcp_cc_info *info)
+			    union tcp_cc_info *info)
 {
 	if (ext & (1 << (INET_DIAG_BBRINFO - 1)) ||
 	    ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-		struct tcp_sock *tp = tcp_sk(sk);
 		struct bbr *bbr = inet_csk_ca(sk);
-		u64 bw = bbr_bw(sk);
-
-		bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE;
-		memset(&info->bbr, 0, sizeof(info->bbr));
-		info->bbr.bbr_bw_lo		= (u32)bw;
-		info->bbr.bbr_bw_hi		= (u32)(bw >> 32);
-		info->bbr.bbr_min_rtt		= bbr->min_rtt_us;
-		info->bbr.bbr_pacing_gain	= bbr->pacing_gain;
-		info->bbr.bbr_cwnd_gain		= bbr->cwnd_gain;
+		u64 bw = bbr_bw_bytes_per_sec(sk, bbr_bw(sk));
+		u64 bw_hi = bbr_bw_bytes_per_sec(sk, bbr_max_bw(sk));
+		u64 bw_lo = bbr->bw_lo == ~0U ?
+			~0ULL : bbr_bw_bytes_per_sec(sk, bbr->bw_lo);
+		struct tcp_bbr_info *bbr_info = &info->bbr;
+
+		memset(bbr_info, 0, sizeof(*bbr_info));
+		bbr_info->bbr_bw_lo		= (u32)bw;
+		bbr_info->bbr_bw_hi		= (u32)(bw >> 32);
+		bbr_info->bbr_min_rtt		= bbr->min_rtt_us;
+		bbr_info->bbr_pacing_gain	= bbr->pacing_gain;
+		bbr_info->bbr_cwnd_gain		= bbr->cwnd_gain;
+		bbr_info->bbr_bw_hi_lsb		= (u32)bw_hi;
+		bbr_info->bbr_bw_hi_msb		= (u32)(bw_hi >> 32);
+		bbr_info->bbr_bw_lo_lsb		= (u32)bw_lo;
+		bbr_info->bbr_bw_lo_msb		= (u32)(bw_lo >> 32);
+		bbr_info->bbr_mode		= bbr->mode;
+		bbr_info->bbr_phase		= (__u8)bbr_get_phase(bbr);
+		bbr_info->bbr_version		= (__u8)BBR_VERSION;
+		bbr_info->bbr_inflight_lo	= bbr->inflight_lo;
+		bbr_info->bbr_inflight_hi	= bbr->inflight_hi;
+		bbr_info->bbr_extra_acked	= bbr_extra_acked(sk);
 		*attr = INET_DIAG_BBRINFO;
-		return sizeof(info->bbr);
+		return sizeof(*bbr_info);
 	}
 	return 0;
 }
 
 __bpf_kfunc static void bbr_set_state(struct sock *sk, u8 new_state)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
 
 	if (new_state == TCP_CA_Loss) {
-		struct rate_sample rs = { .losses = 1 };
 
 		bbr->prev_ca_state = TCP_CA_Loss;
-		bbr->full_bw = 0;
-		bbr->round_start = 1;	/* treat RTO like end of a round */
-		bbr_lt_bw_sampling(sk, &rs);
+		tcp_plb_update_state_upon_rto(sk, &bbr->plb);
+		/* The tcp_write_timeout() call to sk_rethink_txhash() likely
+		 * repathed this flow, so re-learn the min network RTT on the
+		 * new path:
+		 */
+		bbr_reset_full_bw(sk);
+		if (!bbr_is_probing_bandwidth(sk) && bbr->inflight_lo == ~0U) {
+			/* bbr_adapt_lower_bounds() needs cwnd before
+			 * we suffered an RTO, to update inflight_lo:
+			 */
+			bbr->inflight_lo =
+				max(tcp_snd_cwnd(tp), bbr->prior_cwnd);
+		}
+	} else if (bbr->prev_ca_state == TCP_CA_Loss &&
+		   new_state != TCP_CA_Loss) {
+		bbr_exit_loss_recovery(sk);
 	}
 }
 
+
 static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
-	.flags		= TCP_CONG_NON_RESTRICTED,
+	.flags		= TCP_CONG_NON_RESTRICTED | TCP_CONG_WANTS_CE_EVENTS,
 	.name		= "bbr",
 	.owner		= THIS_MODULE,
 	.init		= bbr_init,
 	.cong_control	= bbr_main,
 	.sndbuf_expand	= bbr_sndbuf_expand,
+	.skb_marked_lost = bbr_skb_marked_lost,
 	.undo_cwnd	= bbr_undo_cwnd,
 	.cwnd_event	= bbr_cwnd_event,
 	.ssthresh	= bbr_ssthresh,
-	.min_tso_segs	= bbr_min_tso_segs,
+	.tso_segs	= bbr_tso_segs,
 	.get_info	= bbr_get_info,
 	.set_state	= bbr_set_state,
 };
@@ -1159,10 +2362,11 @@ BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids)
 BTF_ID_FLAGS(func, bbr_init)
 BTF_ID_FLAGS(func, bbr_main)
 BTF_ID_FLAGS(func, bbr_sndbuf_expand)
+BTF_ID_FLAGS(func, bbr_skb_marked_lost)
 BTF_ID_FLAGS(func, bbr_undo_cwnd)
 BTF_ID_FLAGS(func, bbr_cwnd_event)
 BTF_ID_FLAGS(func, bbr_ssthresh)
-BTF_ID_FLAGS(func, bbr_min_tso_segs)
+BTF_ID_FLAGS(func, bbr_tso_segs)
 BTF_ID_FLAGS(func, bbr_set_state)
 BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids)
 
@@ -1195,5 +2399,12 @@ MODULE_AUTHOR("Van Jacobson <vanj@google.com>");
 MODULE_AUTHOR("Neal Cardwell <ncardwell@google.com>");
 MODULE_AUTHOR("Yuchung Cheng <ycheng@google.com>");
 MODULE_AUTHOR("Soheil Hassas Yeganeh <soheil@google.com>");
+MODULE_AUTHOR("Priyaranjan Jha <priyarjha@google.com>");
+MODULE_AUTHOR("Yousuk Seung <ysseung@google.com>");
+MODULE_AUTHOR("Kevin Yang <yyd@google.com>");
+MODULE_AUTHOR("Arjun Roy <arjunroy@google.com>");
+MODULE_AUTHOR("David Morley <morleyd@google.com>");
+
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)");
+MODULE_VERSION(__stringify(BBR_VERSION));
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index df758adbb445..e98e5dbc050e 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -237,6 +237,7 @@ void tcp_init_congestion_control(struct sock *sk)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	tcp_sk(sk)->prior_ssthresh = 0;
+	tcp_sk(sk)->fast_ack_mode = 0;
 	if (icsk->icsk_ca_ops->init)
 		icsk->icsk_ca_ops->init(sk);
 	if (tcp_ca_needs_ecn(sk))
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 198f8a0d37be..a965bde56488 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -358,7 +358,7 @@ static void tcp_data_ecn_check(struct sock *sk, const struct sk_buff *skb)
 			tcp_enter_quickack_mode(sk, 2);
 		break;
 	case INET_ECN_CE:
-		if (tcp_ca_needs_ecn(sk))
+		if (tcp_ca_wants_ce_events(sk))
 			tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
 
 		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR) &&
@@ -376,7 +376,7 @@ static void tcp_data_ecn_check(struct sock *sk, const struct sk_buff *skb)
 		tp->ecn_flags |= TCP_ECN_SEEN;
 		break;
 	default:
-		if (tcp_ca_needs_ecn(sk))
+		if (tcp_ca_wants_ce_events(sk))
 			tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
 		if (!tcp_ecn_mode_rfc3168(tp))
 			break;
@@ -1305,7 +1305,12 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
  */
 static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
 {
+	struct sock *sk = (struct sock *)tp;
+	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+
 	tp->lost += tcp_skb_pcount(skb);
+	if (ca_ops->skb_marked_lost)
+		ca_ops->skb_marked_lost(sk, skb);
 }
 
 void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
@@ -1670,6 +1675,17 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
 	WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
 	tcp_skb_pcount_add(skb, -pcount);
 
+	/* Adjust tx.in_flight as pcount is shifted from skb to prev. */
+	if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount,
+		      "prev in_flight: %u skb in_flight: %u pcount: %u",
+		      TCP_SKB_CB(prev)->tx.in_flight,
+		      TCP_SKB_CB(skb)->tx.in_flight,
+		      pcount))
+		TCP_SKB_CB(skb)->tx.in_flight = 0;
+	else
+		TCP_SKB_CB(skb)->tx.in_flight -= pcount;
+	TCP_SKB_CB(prev)->tx.in_flight += pcount;
+
 	/* When we're adding to gso_segs == 1, gso_size will be zero,
 	 * in theory this shouldn't be necessary but as long as DSACK
 	 * code can come after this skb later on it's better to keep
@@ -3905,7 +3921,8 @@ static int tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 /* This routine deals with acks during a TLP episode and ends an episode by
  * resetting tlp_high_seq. Ref: TLP algorithm in RFC8985
  */
-static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
+static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag,
+				struct rate_sample *rs)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -3922,6 +3939,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
 		/* ACK advances: there was a loss, so reduce cwnd. Reset
 		 * tlp_high_seq in tcp_init_cwnd_reduction()
 		 */
+		tcp_ca_event(sk, CA_EVENT_TLP_RECOVERY);
 		tcp_init_cwnd_reduction(sk);
 		tcp_set_ca_state(sk, TCP_CA_CWR);
 		tcp_end_cwnd_reduction(sk);
@@ -3932,6 +3950,11 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
 			     FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
 		/* Pure dupack: original and TLP probe arrived; no loss */
 		tp->tlp_high_seq = 0;
+	} else {
+		/* This ACK matches a TLP retransmit. We cannot yet tell if
+		 * this ACK is for the original or the TLP retransmit.
+		 */
+		rs->is_acking_tlp_retrans_seq = 1;
 	}
 }
 
@@ -4058,6 +4081,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 
 	prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
 	rs.prior_in_flight = tcp_packets_in_flight(tp);
+	tcp_rate_check_app_limited(sk);
 
 	/* ts_recent update must be made after we are sure that the packet
 	 * is in window.
@@ -4130,7 +4154,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	tcp_in_ack_event(sk, flag);
 
 	if (tp->tlp_high_seq)
-		tcp_process_tlp_ack(sk, ack, flag);
+		tcp_process_tlp_ack(sk, ack, flag, &rs);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
 		if (!(flag & (FLAG_SND_UNA_ADVANCED |
@@ -4155,6 +4179,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 
 	lost = tp->lost - lost;			/* freshly marked lost */
 	rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
+	rs.is_ece = !!(flag & FLAG_ECE);
 	tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
 	tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
 	tcp_xmit_recovery(sk, rexmit);
@@ -4180,7 +4205,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	tcp_ack_probe(sk);
 
 	if (tp->tlp_high_seq)
-		tcp_process_tlp_ack(sk, ack, flag);
+		tcp_process_tlp_ack(sk, ack, flag, &rs);
 	return 1;
 
 old_ack:
@@ -5909,13 +5934,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 
 	    /* More than one full frame received... */
 	if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
+	     (tp->fast_ack_mode == 1 ||
 	     /* ... and right edge of window advances far enough.
 	      * (tcp_recvmsg() will send ACK otherwise).
 	      * If application uses SO_RCVLOWAT, we want send ack now if
 	      * we have not received enough bytes to satisfy the condition.
 	      */
-	    (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
-	     __tcp_select_window(sk) >= tp->rcv_wnd)) ||
+	      (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
+	       __tcp_select_window(sk) >= tp->rcv_wnd))) ||
 	    /* We ACK each frame or... */
 	    tcp_in_quickack_mode(sk) ||
 	    /* Protocol state mandates a one-time immediate ACK */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index bd5462154f97..0be0e7a79f1e 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -497,6 +497,8 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
 	u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
 	bool ca_got_dst = false;
 
+	tcp_set_ecn_low_from_dst(sk, dst);
+
 	if (ca_key != TCP_CA_UNSPEC) {
 		const struct tcp_congestion_ops *ca;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 479afb714bdf..a9eb14d0cf47 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -348,7 +348,8 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
 				th->cwr = 1;
 				skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 			}
-		} else if (!tcp_ca_needs_ecn(sk)) {
+		} else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) &&
+			!tcp_ca_needs_ecn(sk)) {
 			/* ACK or retransmitted segment: clear ECT|CE */
 			INET_ECN_dontxmit(sk);
 		}
@@ -1762,7 +1763,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *buff;
-	int old_factor;
+	int old_factor, inflight_prev;
 	long limit;
 	u16 flags;
 	int nlen;
@@ -1837,6 +1838,30 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 
 		if (diff)
 			tcp_adjust_pcount(sk, skb, diff);
+
+		inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor;
+		if (inflight_prev < 0) {
+			WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious(
+					  old_factor,
+					  TCP_SKB_CB(skb)->sacked,
+					  TCP_SKB_CB(skb)->tx.in_flight),
+				  "inconsistent: tx.in_flight: %u "
+				  "old_factor: %d mss: %u sacked: %u "
+				  "1st pcount: %d 2nd pcount: %d "
+				  "1st len: %u 2nd len: %u ",
+				  TCP_SKB_CB(skb)->tx.in_flight, old_factor,
+				  mss_now, TCP_SKB_CB(skb)->sacked,
+				  tcp_skb_pcount(skb), tcp_skb_pcount(buff),
+				  skb->len, buff->len);
+			inflight_prev = 0;
+		}
+		/* Set 1st tx.in_flight as if 1st were sent by itself: */
+		TCP_SKB_CB(skb)->tx.in_flight = inflight_prev +
+						 tcp_skb_pcount(skb);
+		/* Set 2nd tx.in_flight with new 1st and 2nd pcounts: */
+		TCP_SKB_CB(buff)->tx.in_flight = inflight_prev +
+						 tcp_skb_pcount(skb) +
+						 tcp_skb_pcount(buff);
 	}
 
 	/* Link BUFF into the send queue. */
@@ -2193,13 +2218,12 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
 static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
 {
 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
-	u32 min_tso, tso_segs;
-
-	min_tso = ca_ops->min_tso_segs ?
-			ca_ops->min_tso_segs(sk) :
-			READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+	u32 tso_segs;
 
-	tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
+	tso_segs = ca_ops->tso_segs ?
+		ca_ops->tso_segs(sk, mss_now) :
+		tcp_tso_autosize(sk, mss_now,
+				 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
 	return min_t(u32, tso_segs, sk->sk_gso_max_segs);
 }
 
@@ -2940,6 +2964,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 			skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC);
 			list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
 			tcp_init_tso_segs(skb, mss_now);
+			tcp_set_tx_in_flight(sk, skb);
 			goto repair; /* Skip network transmission */
 		}
 
@@ -3152,6 +3177,7 @@ void tcp_send_loss_probe(struct sock *sk)
 	if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
 		goto rearm_timer;
 
+	tp->tlp_orig_data_app_limited = TCP_SKB_CB(skb)->tx.is_app_limited;
 	if (__tcp_retransmit_skb(sk, skb, 1))
 		goto rearm_timer;
 
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index a8f6d9d06f2e..8737f2134648 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -34,6 +34,24 @@
  * ready to send in the write queue.
  */
 
+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 in_flight;
+
+	/* Check, sanitize, and record packets in flight after skb was sent. */
+	in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb);
+	if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX,
+		      "insane in_flight %u cc %s mss %u "
+		      "cwnd %u pif %u %u %u %u\n",
+		      in_flight, inet_csk(sk)->icsk_ca_ops->name,
+		      tp->mss_cache, tp->snd_cwnd,
+		      tp->packets_out, tp->retrans_out,
+		      tp->sacked_out, tp->lost_out))
+		in_flight = TCPCB_IN_FLIGHT_MAX;
+	TCP_SKB_CB(skb)->tx.in_flight = in_flight;
+}
+
 /* Snapshot the current delivery information in the skb, to generate
  * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered().
  */
@@ -66,7 +84,9 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
 	TCP_SKB_CB(skb)->tx.delivered_mstamp	= tp->delivered_mstamp;
 	TCP_SKB_CB(skb)->tx.delivered		= tp->delivered;
 	TCP_SKB_CB(skb)->tx.delivered_ce	= tp->delivered_ce;
+	TCP_SKB_CB(skb)->tx.lost		= tp->lost;
 	TCP_SKB_CB(skb)->tx.is_app_limited	= tp->app_limited ? 1 : 0;
+	tcp_set_tx_in_flight(sk, skb);
 }
 
 /* When an skb is sacked or acked, we fill in the rate sample with the (prior)
@@ -91,18 +111,21 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
 	if (!rs->prior_delivered ||
 	    tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
 			       scb->end_seq, rs->last_end_seq)) {
+		rs->prior_lost	     = scb->tx.lost;
 		rs->prior_delivered_ce  = scb->tx.delivered_ce;
 		rs->prior_delivered  = scb->tx.delivered;
 		rs->prior_mstamp     = scb->tx.delivered_mstamp;
 		rs->is_app_limited   = scb->tx.is_app_limited;
 		rs->is_retrans	     = scb->sacked & TCPCB_RETRANS;
+		rs->tx_in_flight     = scb->tx.in_flight;
 		rs->last_end_seq     = scb->end_seq;
 
 		/* Record send time of most recently ACKed packet: */
 		tp->first_tx_mstamp  = tx_tstamp;
 		/* Find the duration of the "send phase" of this window: */
-		rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
-						     scb->tx.first_tx_mstamp);
+		rs->interval_us      = tcp_stamp32_us_delta(
+						tp->first_tx_mstamp,
+						scb->tx.first_tx_mstamp);
 
 	}
 	/* Mark off the skb delivered once it's sacked to avoid being
@@ -144,6 +167,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
 		return;
 	}
 	rs->delivered   = tp->delivered - rs->prior_delivered;
+	rs->lost        = tp->lost - rs->prior_lost;
 
 	rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
 	/* delivered_ce occupies less than 32 bits in the skb control block */
@@ -155,7 +179,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
 	 * longer phase.
 	 */
 	snd_us = rs->interval_us;				/* send phase */
-	ack_us = tcp_stamp_us_delta(tp->tcp_mstamp,
+	ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp,
 				    rs->prior_mstamp); /* ack phase */
 	rs->interval_us = max(snd_us, ack_us);
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 160080c9021d..06ee74f2c01d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -702,6 +702,7 @@ void tcp_write_timer_handler(struct sock *sk)
 			       tcp_timeout_expires(sk));
 		return;
 	}
+	tcp_rate_check_app_limited(sk);
 	tcp_mstamp_refresh(tcp_sk(sk));
 	event = icsk->icsk_pending;
 
-- 
2.52.0

From 22a9d3aae1abe0a52fe7016d2a1456b980034d19 Mon Sep 17 00:00:00 2001
From: Eric Naim <dnaim@cachyos.org>
Date: Mon, 26 Jan 2026 17:33:30 +0800
Subject: [PATCH 4/9] cachy

Signed-off-by: Eric Naim <dnaim@cachyos.org>
---
 .gitignore                                    |    2 +
 .../admin-guide/kernel-parameters.txt         |   12 +
 Documentation/admin-guide/sysctl/vm.rst       |   72 +
 Makefile                                      |   33 +-
 arch/Kconfig                                  |   19 +
 arch/x86/Kconfig.cpu                          |   46 +
 arch/x86/Makefile                             |   16 +-
 arch/x86/include/asm/pci.h                    |    6 +
 arch/x86/kernel/cpu/bus_lock.c                |    4 +
 arch/x86/pci/common.c                         |    7 +-
 block/Kconfig.iosched                         |   14 +
 block/Makefile                                |    8 +
 block/adios.c                                 | 2002 ++++++++++
 block/elevator.c                              |   24 +-
 drivers/Makefile                              |   13 +-
 drivers/ata/ahci.c                            |   23 +-
 drivers/cpufreq/Kconfig.x86                   |    2 -
 drivers/cpufreq/intel_pstate.c                |    2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |    1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |   10 +
 drivers/gpu/drm/amd/display/Kconfig           |    6 +
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |    2 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   |    2 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c    |    6 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   |    6 +-
 drivers/gpu/drm/amd/pm/amdgpu_pm.c            |    3 +
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |   15 +-
 drivers/input/evdev.c                         |   19 +-
 drivers/media/v4l2-core/Kconfig               |    5 +
 drivers/media/v4l2-core/Makefile              |    2 +
 drivers/media/v4l2-core/v4l2loopback.c        | 3331 +++++++++++++++++
 drivers/media/v4l2-core/v4l2loopback.h        |  108 +
 .../media/v4l2-core/v4l2loopback_formats.h    |  445 +++
 drivers/pci/controller/Makefile               |    6 +
 drivers/pci/controller/intel-nvme-remap.c     |  462 +++
 drivers/pci/quirks.c                          |  101 +
 drivers/scsi/Kconfig                          |    2 +
 drivers/scsi/Makefile                         |    1 +
 drivers/scsi/vhba/Kconfig                     |    9 +
 drivers/scsi/vhba/Makefile                    |    4 +
 drivers/scsi/vhba/vhba.c                      | 1132 ++++++
 include/linux/mm.h                            |    8 +
 include/linux/pagemap.h                       |    2 +-
 init/Kconfig                                  |   26 +
 kernel/Kconfig.hz                             |   24 +
 kernel/Kconfig.preempt                        |    2 +-
 kernel/fork.c                                 |   28 +
 kernel/locking/rwsem.c                        |    4 +-
 kernel/sched/fair.c                           |   79 +-
 kernel/sched/sched.h                          |    2 +-
 mm/Kconfig                                    |   65 +-
 mm/compaction.c                               |    4 +
 mm/huge_memory.c                              |    4 +
 mm/mm_init.c                                  |    1 +
 mm/page-writeback.c                           |    8 +
 mm/page_alloc.c                               |    4 +
 mm/swap.c                                     |    5 +
 mm/util.c                                     |   34 +
 mm/vmscan.c                                   |  157 +-
 scripts/Makefile.lib                          |    7 +
 scripts/Makefile.thinlto                      |   40 +
 scripts/Makefile.vmlinux_a                    |   83 +
 scripts/mod/modpost.c                         |   15 +-
 scripts/package/PKGBUILD                      |    5 +
 64 files changed, 8491 insertions(+), 99 deletions(-)
 create mode 100644 block/adios.c
 create mode 100644 drivers/media/v4l2-core/v4l2loopback.c
 create mode 100644 drivers/media/v4l2-core/v4l2loopback.h
 create mode 100644 drivers/media/v4l2-core/v4l2loopback_formats.h
 create mode 100644 drivers/pci/controller/intel-nvme-remap.c
 create mode 100644 drivers/scsi/vhba/Kconfig
 create mode 100644 drivers/scsi/vhba/Makefile
 create mode 100644 drivers/scsi/vhba/vhba.c
 create mode 100644 scripts/Makefile.thinlto
 create mode 100644 scripts/Makefile.vmlinux_a

diff --git a/.gitignore b/.gitignore
index 3a7241c941f5..c12bdf5a97f9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -56,6 +56,7 @@
 *.zst
 Module.symvers
 dtbs-list
+builtin.order
 modules.order
 
 #
@@ -67,6 +68,7 @@ modules.order
 /vmlinux.32
 /vmlinux.map
 /vmlinux.symvers
+/vmlinux.thinlto-index
 /vmlinux.unstripped
 /vmlinux-gdb.py
 /vmlinuz
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 1058f2a6d6a8..e70e737f6daa 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2520,6 +2520,9 @@ Kernel parameters
 			disable
 			  Do not enable intel_pstate as the default
 			  scaling driver for the supported processors
+			enable
+			  Enable intel_pstate in-case "disable" was passed
+			  previously in the kernel boot parameters
                         active
                           Use intel_pstate driver to bypass the scaling
                           governors layer of cpufreq and provides it own
@@ -5005,6 +5008,15 @@ Kernel parameters
 		nomsi		[MSI] If the PCI_MSI kernel config parameter is
 				enabled, this kernel boot option can be used to
 				disable the use of MSI interrupts system-wide.
+		pcie_acs_override =
+					[PCIE] Override missing PCIe ACS support for:
+				downstream
+					All downstream ports - full ACS capabilities
+				multfunction
+					All multifunction devices - multifunction ACS subset
+				id:nnnn:nnnn
+					Specfic device - full ACS capabilities
+					Specified as vid:did (vendor/device ID) in hex
 		noioapicquirk	[APIC] Disable all boot interrupt quirks.
 				Safety option to keep boot IRQs enabled. This
 				should never be necessary.
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index 245bf6394935..030481823495 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -25,6 +25,9 @@ files can be found in mm/swap.c.
 Currently, these files are in /proc/sys/vm:
 
 - admin_reserve_kbytes
+- anon_min_ratio
+- clean_low_ratio
+- clean_min_ratio
 - compact_memory
 - compaction_proactiveness
 - compact_unevictable_allowed
@@ -110,6 +113,67 @@ On x86_64 this is about 128MB.
 Changing this takes effect whenever an application requests memory.
 
 
+anon_min_ratio
+==============
+
+This knob provides *hard* protection of anonymous pages. The anonymous pages
+on the current node won't be reclaimed under any conditions when their amount
+is below vm.anon_min_ratio.
+
+This knob may be used to prevent excessive swap thrashing when anonymous
+memory is low (for example, when memory is going to be overfilled by
+compressed data of zram module).
+
+Setting this value too high (close to 100) can result in inability to
+swap and can lead to early OOM under memory pressure.
+
+The unit of measurement is the percentage of the total memory of the node.
+
+The default value is 1.
+
+
+clean_low_ratio
+================
+
+This knob provides *best-effort* protection of clean file pages. The file pages
+on the current node won't be reclaimed under memory pressure when the amount of
+clean file pages is below vm.clean_low_ratio *unless* we threaten to OOM.
+
+Protection of clean file pages using this knob may be used when swapping is
+still possible to
+  - prevent disk I/O thrashing under memory pressure;
+  - improve performance in disk cache-bound tasks under memory pressure.
+
+Setting it to a high value may result in a early eviction of anonymous pages
+into the swap space by attempting to hold the protected amount of clean file
+pages in memory.
+
+The unit of measurement is the percentage of the total memory of the node.
+
+The default value is 15.
+
+
+clean_min_ratio
+================
+
+This knob provides *hard* protection of clean file pages. The file pages on the
+current node won't be reclaimed under memory pressure when the amount of clean
+file pages is below vm.clean_min_ratio.
+
+Hard protection of clean file pages using this knob may be used to
+  - prevent disk I/O thrashing under memory pressure even with no free swap space;
+  - improve performance in disk cache-bound tasks under memory pressure;
+  - avoid high latency and prevent livelock in near-OOM conditions.
+
+Setting it to a high value may result in a early out-of-memory condition due to
+the inability to reclaim the protected amount of clean file pages when other
+types of pages cannot be reclaimed.
+
+The unit of measurement is the percentage of the total memory of the node.
+
+The default value is 4.
+
+
 compact_memory
 ==============
 
@@ -984,6 +1048,14 @@ be 133 (x + 2x = 200, 2x = 133.33).
 At 0, the kernel will not initiate swap until the amount of free and
 file-backed pages is less than the high watermark in a zone.
 
+This knob has no effect if the amount of clean file pages on the current
+node is below vm.clean_low_ratio or vm.clean_min_ratio. In this case,
+only anonymous pages can be reclaimed.
+
+If the number of anonymous pages on the current node is below
+vm.anon_min_ratio, then only file pages can be reclaimed with
+any vm.swappiness value.
+
 
 unprivileged_userfaultfd
 ========================
diff --git a/Makefile b/Makefile
index c8a588fef62c..8807e0122a16 100644
--- a/Makefile
+++ b/Makefile
@@ -888,11 +888,19 @@ KBUILD_CFLAGS	+= -fno-delete-null-pointer-checks
 ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
 KBUILD_CFLAGS += -O2
 KBUILD_RUSTFLAGS += -Copt-level=2
+else ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3
+KBUILD_CFLAGS += -O3
+KBUILD_RUSTFLAGS += -Copt-level=3
 else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 KBUILD_CFLAGS += -Os
 KBUILD_RUSTFLAGS += -Copt-level=s
 endif
 
+# Perform swing modulo scheduling immediately before the first scheduling pass.
+# This pass looks at innermost loops and reorders their instructions by
+# overlapping different iterations.
+KBUILD_CFLAGS += $(call cc-option,-fmodulo-sched -fmodulo-sched-allow-regmoves -fivopts)
+
 # Always set `debug-assertions` and `overflow-checks` because their default
 # depends on `opt-level` and `debug-assertions`, respectively.
 KBUILD_RUSTFLAGS += -Cdebug-assertions=$(if $(CONFIG_RUST_DEBUG_ASSERTIONS),y,n)
@@ -1022,10 +1030,10 @@ export CC_FLAGS_SCS
 endif
 
 ifdef CONFIG_LTO_CLANG
-ifdef CONFIG_LTO_CLANG_THIN
-CC_FLAGS_LTO	:= -flto=thin -fsplit-lto-unit
-else
+ifdef CONFIG_LTO_CLANG_FULL
 CC_FLAGS_LTO	:= -flto
+else
+CC_FLAGS_LTO	:= -flto=thin -fsplit-lto-unit
 endif
 CC_FLAGS_LTO	+= -fvisibility=hidden
 
@@ -1232,7 +1240,7 @@ export ARCH_DRIVERS	:= $(drivers-y) $(drivers-m)
 KBUILD_VMLINUX_OBJS := built-in.a $(patsubst %/, %/lib.a, $(filter %/, $(libs-y)))
 KBUILD_VMLINUX_LIBS := $(filter-out %/, $(libs-y))
 
-export KBUILD_VMLINUX_LIBS
+export KBUILD_VMLINUX_OBJS KBUILD_VMLINUX_LIBS
 export KBUILD_LDS          := arch/$(SRCARCH)/kernel/vmlinux.lds
 
 ifdef CONFIG_TRIM_UNUSED_KSYMS
@@ -1241,16 +1249,12 @@ ifdef CONFIG_TRIM_UNUSED_KSYMS
 KBUILD_MODULES := y
 endif
 
-# '$(AR) mPi' needs 'T' to workaround the bug of llvm-ar <= 14
-quiet_cmd_ar_vmlinux.a = AR      $@
-      cmd_ar_vmlinux.a = \
-	rm -f $@; \
-	$(AR) cDPrST $@ $(KBUILD_VMLINUX_OBJS); \
-	$(AR) mPiT $$($(AR) t $@ | sed -n 1p) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt)
+PHONY += vmlinux_a
+vmlinux_a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE
+	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_a
 
-targets += vmlinux.a
-vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE
-	$(call if_changed,ar_vmlinux.a)
+vmlinux.a: vmlinux_a
+	@:
 
 PHONY += vmlinux_o
 vmlinux_o: vmlinux.a $(KBUILD_VMLINUX_LIBS)
@@ -1611,6 +1615,7 @@ endif # CONFIG_MODULES
 CLEAN_FILES += vmlinux.symvers modules-only.symvers \
 	       modules.builtin modules.builtin.modinfo modules.nsdeps \
 	       modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \
+	       vmlinux.thinlto-index builtin.order \
 	       compile_commands.json rust/test \
 	       rust-project.json .vmlinux.objs .vmlinux.export.c \
                .builtin-dtbs-list .builtin-dtb.S
@@ -2063,7 +2068,7 @@ clean: $(clean-dirs)
 	$(call cmd,rmfiles)
 	@find . $(RCS_FIND_IGNORE) \
 		\( -name '*.[aios]' -o -name '*.rsi' -o -name '*.ko' -o -name '.*.cmd' \
-		-o -name '*.ko.*' \
+		-o -name '*.ko.*' -o -name '*.o.thinlto.bc' \
 		-o -name '*.dtb' -o -name '*.dtbo' \
 		-o -name '*.dtb.S' -o -name '*.dtbo.S' \
 		-o -name '*.dt.yaml' -o -name 'dtbs-list' \
diff --git a/arch/Kconfig b/arch/Kconfig
index 1401fd81cfe0..753e48b25637 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -861,6 +861,25 @@ config LTO_CLANG_THIN
 	    https://clang.llvm.org/docs/ThinLTO.html
 
 	  If unsure, say Y.
+
+config LTO_CLANG_THIN_DIST
+	bool "Clang ThinLTO in distributed mode (EXPERIMENTAL)"
+	depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN
+	select LTO_CLANG
+	help
+	  This option enables Clang's ThinLTO in distributed build mode.
+	  In this mode, the linker performs the thin-link, generating
+	  ThinLTO index files. Subsequently, the build system explicitly
+	  invokes ThinLTO backend compilation using these index files
+	  and pre-linked IR objects. The resulting native object files
+	  are with the .thinlto-native.o suffix.
+
+	  This build mode offers improved visibility into the ThinLTO
+	  process through explicit subcommand exposure. It also makes
+	  final native object files directly available, benefiting
+	  tools like objtool and kpatch. Additionally, it provides
+	  crucial granular control over back-end options, enabling
+	  module-specific compiler options, and simplifies debugging.
 endchoice
 
 config ARCH_SUPPORTS_AUTOFDO_CLANG
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index f928cf6e3252..d4ce964d9713 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -255,6 +255,11 @@ config CC_HAS_MARCH_NATIVE
 	# usage warnings that only appear wth '-march=native'.
 	depends on CC_IS_GCC || CLANG_VERSION >= 190100
 
+
+choice
+	prompt "x86_64 Compiler Build Optimization"
+	default GENERIC_CPU
+
 config X86_NATIVE_CPU
 	bool "Build and optimize for local/native CPU"
 	depends on X86_64
@@ -269,6 +274,47 @@ config X86_NATIVE_CPU
 
 	  If unsure, say N.
 
+config GENERIC_CPU
+	bool "Generic-x86-64"
+	depends on X86_64
+	help
+	  Generic x86-64 CPU.
+          Runs equally well on all x86-64 CPUs.
+
+config MZEN4
+	bool "AMD Ryzen 4"
+	depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 160000)
+	help
+	  Select this for AMD Family 19h Zen 4 processors.
+
+          Enables -march=znver4
+
+endchoice
+
+config X86_64_VERSION
+	int "x86-64 compiler ISA level"
+	range 1 4
+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+	depends on X86_64 && GENERIC_CPU
+	help
+	  Specify a specific x86-64 compiler ISA level.
+
+	  There are three x86-64 ISA levels that work on top of
+	  the x86-64 baseline, namely: x86-64-v2 and x86-64-v3.
+
+	  x86-64-v2 brings support for vector instructions up to Streaming SIMD
+	  Extensions 4.2 (SSE4.2) and Supplemental Streaming SIMD Extensions 3
+	  (SSSE3), the POPCNT instruction, and CMPXCHG16B.
+
+	  x86-64-v3 adds vector instructions up to AVX2, MOVBE, and additional
+	  bit-manipulation instructions.
+
+	  x86-64-v4 is not included since the kernel does not use AVX512 instructions
+
+	  You can find the best version for your CPU by running one of the following:
+	  /lib/ld-linux-x86-64.so.2 --help | grep supported
+	  /lib64/ld-linux-x86-64.so.2 --help | grep supported
+
 config X86_GENERIC
 	bool "Generic x86 support"
 	depends on X86_32
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1d403a3612ea..d3a1ae544f2a 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -167,10 +167,22 @@ else
 ifdef CONFIG_X86_NATIVE_CPU
         KBUILD_CFLAGS += -march=native
         KBUILD_RUSTFLAGS += -Ctarget-cpu=native
-else
+endif
+
+ifdef CONFIG_MZEN4
+        KBUILD_CFLAGS += -march=znver4
+        KBUILD_RUSTFLAGS += -Ctarget-cpu=znver4
+endif
+
+ifdef CONFIG_GENERIC_CPU
+ifeq ($(CONFIG_X86_64_VERSION),1)
         KBUILD_CFLAGS += -march=x86-64 -mtune=generic
         KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64 -Ztune-cpu=generic
-endif
+else
+        KBUILD_CFLAGS +=-march=x86-64-v$(CONFIG_X86_64_VERSION)
+        KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64-v$(CONFIG_X86_64_VERSION)
+endif # CONFIG_X86_64_VERSION
+endif # CONFIG_GENERIC_CPU
 
         KBUILD_CFLAGS += -mno-red-zone
         KBUILD_CFLAGS += -mcmodel=kernel
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index b3ab80a03365..5e883b397ff3 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -26,6 +26,7 @@ struct pci_sysdata {
 #if IS_ENABLED(CONFIG_VMD)
 	struct pci_dev	*vmd_dev;	/* VMD Device if in Intel VMD domain */
 #endif
+	struct pci_dev	*nvme_remap_dev;	/* AHCI Device if NVME remapped bus */
 };
 
 extern int pci_routeirq;
@@ -69,6 +70,11 @@ static inline bool is_vmd(struct pci_bus *bus)
 #define is_vmd(bus)		false
 #endif /* CONFIG_VMD */
 
+static inline bool is_nvme_remap(struct pci_bus *bus)
+{
+	return to_pci_sysdata(bus)->nvme_remap_dev != NULL;
+}
+
 /* Can be used to override the logic in pci_scan_bus for skipping
    already-configured bus numbers - to be used for buggy BIOSes
    or architectures with incomplete PCI setup by the loader */
diff --git a/arch/x86/kernel/cpu/bus_lock.c b/arch/x86/kernel/cpu/bus_lock.c
index dbc99a47be45..e92b3f7c8cdd 100644
--- a/arch/x86/kernel/cpu/bus_lock.c
+++ b/arch/x86/kernel/cpu/bus_lock.c
@@ -47,7 +47,11 @@ static const struct {
 
 static struct ratelimit_state bld_ratelimit;
 
+#ifdef CONFIG_CACHY
+static unsigned int sysctl_sld_mitigate = 0;
+#else
 static unsigned int sysctl_sld_mitigate = 1;
+#endif /* CONFIG_CACHY */
 static DEFINE_SEMAPHORE(buslock_sem, 1);
 
 #ifdef CONFIG_PROC_SYSCTL
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index ddb798603201..7c20387d8202 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -723,12 +723,15 @@ int pci_ext_cfg_avail(void)
 		return 0;
 }
 
-#if IS_ENABLED(CONFIG_VMD)
 struct pci_dev *pci_real_dma_dev(struct pci_dev *dev)
 {
+#if IS_ENABLED(CONFIG_VMD)
 	if (is_vmd(dev->bus))
 		return to_pci_sysdata(dev->bus)->vmd_dev;
+#endif
+
+	if (is_nvme_remap(dev->bus))
+		return to_pci_sysdata(dev->bus)->nvme_remap_dev;
 
 	return dev;
 }
-#endif
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 27f11320b8d1..e98585dd83e0 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -16,6 +16,20 @@ config MQ_IOSCHED_KYBER
 	  synchronous writes, it will self-tune queue depths to achieve that
 	  goal.
 
+config MQ_IOSCHED_ADIOS
+	tristate "Adaptive Deadline I/O scheduler"
+	default m
+	help
+	  The Adaptive Deadline I/O Scheduler (ADIOS) is a multi-queue I/O
+	  scheduler with learning-based adaptive latency control.
+
+config MQ_IOSCHED_DEFAULT_ADIOS
+	bool "Enable ADIOS I/O scheduler as default MQ I/O scheduler"
+	depends on MQ_IOSCHED_ADIOS=y
+	default n
+	help
+	  Enable the ADIOS I/O scheduler as the default scheduler for MQ I/O.
+
 config IOSCHED_BFQ
 	tristate "BFQ I/O scheduler"
 	select BLK_ICQ
diff --git a/block/Makefile b/block/Makefile
index c65f4da93702..105b12fd86b8 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_BLK_CGROUP_IOLATENCY)	+= blk-iolatency.o
 obj-$(CONFIG_BLK_CGROUP_IOCOST)	+= blk-iocost.o
 obj-$(CONFIG_MQ_IOSCHED_DEADLINE)	+= mq-deadline.o
 obj-$(CONFIG_MQ_IOSCHED_KYBER)	+= kyber-iosched.o
+obj-$(CONFIG_MQ_IOSCHED_ADIOS)	+= adios.o
 bfq-y				:= bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
 obj-$(CONFIG_IOSCHED_BFQ)	+= bfq.o
 
@@ -36,3 +37,10 @@ obj-$(CONFIG_BLK_INLINE_ENCRYPTION)	+= blk-crypto.o blk-crypto-profile.o \
 					   blk-crypto-sysfs.o
 obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK)	+= blk-crypto-fallback.o
 obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED)	+= holder.o
+
+all:
+	make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
+
+clean:
+	make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
+
diff --git a/block/adios.c b/block/adios.c
new file mode 100644
index 000000000000..c6eaa85911d3
--- /dev/null
+++ b/block/adios.c
@@ -0,0 +1,2002 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Adaptive Deadline I/O Scheduler (ADIOS)
+ * Copyright (C) 2025 Masahito Suzuki
+ */
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/compiler.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/math.h>
+#include <linux/module.h>
+#include <linux/rbtree.h>
+#include <linux/sbitmap.h>
+#include <linux/slab.h>
+#include <linux/timekeeping.h>
+#include <linux/percpu.h>
+#include <linux/string.h>
+#include <linux/list_sort.h>
+#include <linux/rcupdate.h>
+
+#include "elevator.h"
+#include "blk.h"
+#include "blk-mq.h"
+#include "blk-mq-sched.h"
+
+#define ADIOS_VERSION "3.1.8"
+
+/* Request Types:
+ *
+ * Tier 0 (Highest Priority): Emergency & System Integrity Requests
+ * -----------------------------------------------------------------
+ * - Target: Requests with the BLK_MQ_INSERT_AT_HEAD flag.
+ * - Purpose: For critical, non-negotiable operations such as device error
+ *   recovery or flush sequences that must bypass all other scheduling logic.
+ * - Implementation: Placed in a dedicated, high-priority FIFO queue
+ *   (`prio_queue[0]`) for immediate dispatch.
+ *
+ * Tier 1 (High Priority): I/O Barrier Guarantees
+ * ---------------------------------------------------------------
+ * - Target: Requests with the REQ_OP_FLUSH flag.
+ * - Purpose: To enforce a strict I/O barrier. When a flush request is
+ *   received, the scheduler stops processing new requests from its main
+ *   queues until all preceding requests have been completed. This guarantees
+ *   the order of operations required by filesystems for data integrity.
+ * - Implementation: A state flag (ADIOS_STATE_BARRIER) halts
+ *   insertion into the main deadline tree. The barrier request and all
+ *   subsequent requests are held in a temporary `barrier_queue`. Once the
+ *   main queues are drained, the barrier request and the subsequent requests
+ *   are released from the pending queue back into the scheduler.
+ *
+ * Tier 2 (Medium Priority): Application Responsiveness
+ * ----------------------------------------------------
+ * - Target: Normal synchronous requests (e.g., from standard file reads).
+ * - Purpose: To ensure correct application behavior for operations that
+ *   depend on sequential I/O completion (e.g., file system mounts) and to
+ *   provide low latency for interactive applications.
+ * - Implementation: The deadline for these requests is set to their start
+ *   time (`rq->start_time_ns`). This effectively enforces FIFO-like behavior
+ *   within the deadline-sorted red-black tree, preventing out-of-order
+ *   execution of dependent synchronous operations.
+ *
+ * Tier 3 (Normal Priority): Background Throughput
+ * -----------------------------------------------
+ * - Target: Asynchronous requests.
+ * - Purpose: To maximize disk throughput for background tasks where latency
+ *   is not critical.
+ * - Implementation: These are the only requests where ADIOS's adaptive
+ *   latency prediction model is used. A dynamic deadline is calculated based
+ *   on the predicted I/O latency, allowing for aggressive reordering to
+ *   optimize I/O efficiency.
+ *
+ * Dispatch Logic:
+ * The scheduler always dispatches requests in strict priority order:
+ * 1. prio_queue[0] (Tier 0)
+ * 2. The deadline-sorted batch queue (which naturally prioritizes Tier 2
+ *    over Tier 3 due to their calculated deadlines).
+ * 3. Barrier-pending requests are handled only after the main queues are empty.
+ */
+
+// Global variable to control the latency
+static u64 default_global_latency_window            = 16000000ULL;
+static u64 default_global_latency_window_rotational = 22000000ULL;
+// Ratio below which batch queues should be refilled
+static u8  default_bq_refill_below_ratio = 20;
+// Maximum latency sample to input
+static u64 default_lat_model_latency_limit = 500000000ULL;
+// Batch ordering strategy
+static u64 default_batch_order = 0;
+
+/* Compliance Flags:
+ * 0x1: Async requests will not be reordered based on the predicted latency
+ */
+enum adios_compliance_flags {
+	ADIOS_CF_FIXORDER  = 1U << 0,
+};
+
+// Flags to control compliance with block layer constraints
+static u64 default_compliance_flags = 0x0;
+
+// Dynamic thresholds for shrinkage
+static u32 default_lm_shrink_at_kreqs  =  5000;
+static u32 default_lm_shrink_at_gbytes =    50;
+static u32 default_lm_shrink_resist    =     2;
+
+enum adios_optype {
+	ADIOS_READ    = 0,
+	ADIOS_WRITE   = 1,
+	ADIOS_DISCARD = 2,
+	ADIOS_OTHER   = 3,
+	ADIOS_OPTYPES = 4,
+};
+
+// Latency targets for each operation type
+static u64 default_latency_target[ADIOS_OPTYPES] = {
+	[ADIOS_READ]    =     2ULL * NSEC_PER_MSEC,
+	[ADIOS_WRITE]   =  2000ULL * NSEC_PER_MSEC,
+	[ADIOS_DISCARD] =  8000ULL * NSEC_PER_MSEC,
+	[ADIOS_OTHER]   =     0ULL * NSEC_PER_MSEC,
+};
+
+// Maximum batch size limits for each operation type
+static u32 default_batch_limit[ADIOS_OPTYPES] = {
+	[ADIOS_READ]    = 36,
+	[ADIOS_WRITE]   = 72,
+	[ADIOS_DISCARD] =  1,
+	[ADIOS_OTHER]   =  1,
+};
+
+enum adios_batch_order {
+	ADIOS_BO_OPTYPE   = 0,
+	ADIOS_BO_ELEVATOR = 1,
+};
+
+// Thresholds for latency model control
+#define LM_BLOCK_SIZE_THRESHOLD 4096
+#define LM_SAMPLES_THRESHOLD    1024
+#define LM_INTERVAL_THRESHOLD   1500
+#define LM_OUTLIER_PERCENTILE     99
+#define LM_LAT_BUCKET_COUNT       64
+
+#define ADIOS_PQ_LEVELS 2
+#define ADIOS_DL_TYPES  2
+#define ADIOS_BQ_PAGES  2
+
+static u32 default_dl_prio[ADIOS_DL_TYPES] = {8, 0};
+
+// Bit flags for the atomic state variable, indicating which queues have requests.
+enum adios_state_flags {
+	ADIOS_STATE_PQ_0      = 1U << 0,
+	ADIOS_STATE_PQ_1      = 1U << 1,
+	ADIOS_STATE_DL_0      = 1U << 2,
+	ADIOS_STATE_DL_1      = 1U << 3,
+	ADIOS_STATE_BQ_PAGE_0 = 1U << 4,
+	ADIOS_STATE_BQ_PAGE_1 = 1U << 5,
+	ADIOS_STATE_BARRIER   = 1U << 6,
+};
+#define ADIOS_STATE_PQ 0
+#define ADIOS_STATE_DL 2
+#define ADIOS_STATE_BQ 4
+#define ADIOS_STATE_BP 6
+
+// Temporal granularity of the deadline tree node (dl_group)
+#define ADIOS_QUANTUM_SHIFT 20
+
+#define ADIOS_MAX_INSERTS_PER_LOCK 72
+#define ADIOS_MAX_DELETES_PER_LOCK 24
+
+// Structure to hold latency bucket data for small requests
+struct latency_bucket_small {
+	u64 weighted_sum_latency;
+	u64 sum_of_weights;
+};
+
+// Structure to hold latency bucket data for large requests
+struct latency_bucket_large {
+	u64 weighted_sum_latency;
+	u64 weighted_sum_block_size;
+	u64 sum_of_weights;
+};
+
+// Structure to hold per-cpu buckets, improving data locality and code clarity.
+struct lm_buckets {
+	struct latency_bucket_small small_bucket[LM_LAT_BUCKET_COUNT];
+	struct latency_bucket_large large_bucket[LM_LAT_BUCKET_COUNT];
+};
+
+// Structure to hold RCU-protected latency model parameters
+struct latency_model_params {
+	u64 base;
+	u64 slope;
+	u64 small_sum_delay;
+	u64 small_count;
+	u64 large_sum_delay;
+	u64 large_sum_bsize;
+	u64 last_update_jiffies;
+	struct rcu_head rcu;
+};
+
+// Structure to hold the latency model context data
+struct latency_model {
+	spinlock_t update_lock;
+	struct latency_model_params __rcu *params;
+
+	// Per-CPU buckets to avoid lock contention on the completion path
+	struct lm_buckets __percpu *pcpu_buckets;
+
+	u32 lm_shrink_at_kreqs;
+	u32 lm_shrink_at_gbytes;
+	u8  lm_shrink_resist;
+};
+
+union adios_in_flight_rqs {
+	atomic64_t	atomic;
+	u64			scalar;
+	struct {
+		u64 	count:          16;
+		u64 	total_pred_lat: 48;
+	};
+};
+
+// Adios scheduler data
+struct adios_data {
+	spinlock_t pq_lock;
+	struct list_head prio_queue[2];
+
+	struct rb_root_cached dl_tree[2];
+	spinlock_t lock;
+	s64 dl_bias;
+	s32 dl_prio[2];
+
+	atomic_t state;
+	u8  bq_state[ADIOS_BQ_PAGES];
+
+	void (*insert_request_fn)(struct blk_mq_hw_ctx *, struct request *,
+								blk_insert_t, struct list_head *);
+
+	u64 global_latency_window;
+	u64 compliance_flags;
+	u64 latency_target[ADIOS_OPTYPES];
+	u32 batch_limit[ADIOS_OPTYPES];
+	u32 batch_actual_max_size[ADIOS_OPTYPES];
+	u32 batch_actual_max_total;
+	u32 async_depth;
+	u32 lat_model_latency_limit;
+	u8  bq_refill_below_ratio;
+	u8  is_rotational;
+	u8  batch_order;
+	u8  elv_direction;
+	sector_t head_pos;
+	sector_t last_completed_pos;
+
+	bool bq_page;
+	struct list_head batch_queue[ADIOS_BQ_PAGES][ADIOS_OPTYPES];
+	u32 batch_count[ADIOS_BQ_PAGES][ADIOS_OPTYPES];
+	u8  bq_batch_order[ADIOS_BQ_PAGES];
+	spinlock_t bq_lock;
+	spinlock_t barrier_lock;
+	struct list_head barrier_queue;
+
+	struct lm_buckets *aggr_buckets;
+
+	struct latency_model latency_model[ADIOS_OPTYPES];
+	struct timer_list update_timer;
+
+	union adios_in_flight_rqs in_flight_rqs;
+	atomic64_t total_pred_lat;
+	u64 last_completed_time;
+
+	struct kmem_cache *rq_data_pool;
+	struct kmem_cache *dl_group_pool;
+
+	struct request_queue *queue;
+};
+
+// List of requests with the same deadline in the deadline-sorted tree
+struct dl_group {
+	struct rb_node node;
+	struct list_head rqs;
+	u64 deadline;
+} __attribute__((aligned(64)));
+
+// Structure to hold scheduler-specific data for each request
+struct adios_rq_data {
+	struct list_head *dl_group;
+	struct list_head dl_node;
+
+	struct request *rq;
+	u64 deadline;
+	u64 pred_lat;
+	u32 block_size;
+	bool managed;
+} __attribute__((aligned(64)));
+
+static const int adios_prio_to_wmult[40] = {
+ /* -20 */     88761,     71755,     56483,     46273,     36291,
+ /* -15 */     29154,     23254,     18705,     14949,     11916,
+ /* -10 */      9548,      7620,      6100,      4904,      3906,
+ /*  -5 */      3121,      2501,      1991,      1586,      1277,
+ /*   0 */      1024,       820,       655,       526,       423,
+ /*   5 */       335,       272,       215,       172,       137,
+ /*  10 */       110,        87,        70,        56,        45,
+ /*  15 */        36,        29,        23,        18,        15,
+};
+
+static inline bool compliant(struct adios_data *ad, u32 flag) {
+	return ad->compliance_flags & flag;
+}
+
+// Count the number of entries in aggregated small buckets
+static u64 lm_count_small_entries(struct latency_bucket_small *buckets) {
+	u64 total_weight = 0;
+	for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++)
+		total_weight += buckets[i].sum_of_weights;
+	return total_weight;
+}
+
+// Update the small buckets in the latency model from aggregated data
+static bool lm_update_small_buckets(struct latency_model *model,
+		struct latency_model_params *params,
+		struct latency_bucket_small *buckets,
+		u64 total_weight, bool count_all) {
+	u64 sum_latency = 0;
+	u64 sum_weight = 0;
+	u64 cumulative_weight = 0, threshold_weight = 0;
+	u8  outlier_threshold_bucket = 0;
+	u8  outlier_percentile = LM_OUTLIER_PERCENTILE;
+	u8  reduction;
+
+	if (count_all)
+		outlier_percentile = 100;
+
+	// Calculate the threshold weight for outlier detection
+	threshold_weight = (total_weight * outlier_percentile) / 100;
+
+	// Identify the bucket that corresponds to the outlier threshold
+	for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) {
+		cumulative_weight += buckets[i].sum_of_weights;
+		if (cumulative_weight >= threshold_weight) {
+			outlier_threshold_bucket = i;
+			break;
+		}
+	}
+
+	// Calculate the average latency, excluding outliers
+	for (u8 i = 0; i <= outlier_threshold_bucket; i++) {
+		struct latency_bucket_small *bucket = &buckets[i];
+		if (i < outlier_threshold_bucket) {
+			sum_latency += bucket->weighted_sum_latency;
+			sum_weight += bucket->sum_of_weights;
+		} else {
+			// The threshold bucket's contribution is proportional
+			u64 remaining_weight =
+				threshold_weight - (cumulative_weight - bucket->sum_of_weights);
+			if (bucket->sum_of_weights > 0) {
+				sum_latency += div_u64(bucket->weighted_sum_latency *
+					remaining_weight, bucket->sum_of_weights);
+				sum_weight += remaining_weight;
+			}
+		}
+	}
+
+	// Shrink the model if it reaches at the readjustment threshold
+	if (params->small_count >= 1000ULL * model->lm_shrink_at_kreqs) {
+		reduction = model->lm_shrink_resist;
+		if (params->small_count >> reduction) {
+			params->small_sum_delay -= params->small_sum_delay >> reduction;
+			params->small_count     -= params->small_count     >> reduction;
+		}
+	}
+
+	if (!sum_weight)
+		return false;
+
+	// Accumulate the average latency into the statistics
+	params->small_sum_delay += sum_latency;
+	params->small_count     += sum_weight;
+
+	return true;
+}
+
+// Count the number of entries in aggregated large buckets
+static u64 lm_count_large_entries(struct latency_bucket_large *buckets) {
+	u64 total_weight = 0;
+	for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++)
+		total_weight += buckets[i].sum_of_weights;
+	return total_weight;
+}
+
+// Update the large buckets in the latency model from aggregated data
+static bool lm_update_large_buckets(struct latency_model *model,
+		struct latency_model_params *params,
+		struct latency_bucket_large *buckets,
+		u64 total_weight, bool count_all) {
+	s64 sum_latency = 0;
+	u64 sum_block_size = 0, intercept;
+	u64 cumulative_weight = 0, threshold_weight = 0;
+	u64 sum_weight = 0;
+	u8  outlier_threshold_bucket = 0;
+	u8  outlier_percentile = LM_OUTLIER_PERCENTILE;
+	u8  reduction;
+
+	if (count_all)
+		outlier_percentile = 100;
+
+	// Calculate the threshold weight for outlier detection
+	threshold_weight = (total_weight * outlier_percentile) / 100;
+
+	// Identify the bucket that corresponds to the outlier threshold
+	for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) {
+		cumulative_weight += buckets[i].sum_of_weights;
+		if (cumulative_weight >= threshold_weight) {
+			outlier_threshold_bucket = i;
+			break;
+		}
+	}
+
+	// Calculate the average latency and block size, excluding outliers
+	for (u8 i = 0; i <= outlier_threshold_bucket; i++) {
+		struct latency_bucket_large *bucket = &buckets[i];
+		if (i < outlier_threshold_bucket) {
+			sum_latency += bucket->weighted_sum_latency;
+			sum_block_size += bucket->weighted_sum_block_size;
+			sum_weight += bucket->sum_of_weights;
+		} else {
+			// The threshold bucket's contribution is proportional
+			u64 remaining_weight =
+				threshold_weight - (cumulative_weight - bucket->sum_of_weights);
+			if (bucket->sum_of_weights > 0) {
+				sum_latency += div_u64(bucket->weighted_sum_latency *
+					remaining_weight, bucket->sum_of_weights);
+				sum_block_size += div_u64(bucket->weighted_sum_block_size *
+					remaining_weight, bucket->sum_of_weights);
+				sum_weight += remaining_weight;
+			}
+		}
+	}
+
+	if (!sum_weight)
+		return false;
+
+	// Shrink the model if it reaches at the readjustment threshold
+	if (params->large_sum_bsize >= 0x40000000ULL * model->lm_shrink_at_gbytes) {
+		reduction = model->lm_shrink_resist;
+		if (params->large_sum_bsize >> reduction) {
+			params->large_sum_delay -= params->large_sum_delay >> reduction;
+			params->large_sum_bsize -= params->large_sum_bsize >> reduction;
+		}
+	}
+
+	// Accumulate the average delay into the statistics
+	intercept = params->base;
+	if (sum_latency > intercept)
+		sum_latency -= intercept;
+
+	params->large_sum_delay += sum_latency;
+	params->large_sum_bsize += sum_block_size;
+
+	return true;
+}
+
+static void reset_buckets(struct lm_buckets *buckets)
+{ memset(buckets, 0, sizeof(*buckets)); }
+
+static void lm_reset_pcpu_buckets(struct latency_model *model) {
+	int cpu;
+	for_each_possible_cpu(cpu)
+		reset_buckets(per_cpu_ptr(model->pcpu_buckets, cpu));
+}
+
+// Update the latency model parameters and statistics
+static void latency_model_update(
+		struct adios_data *ad, struct latency_model *model) {
+	u64 now;
+	u64 small_weight, large_weight;
+	bool time_elapsed;
+	bool small_processed = false, large_processed = false;
+	struct lm_buckets *aggr = ad->aggr_buckets;
+	struct latency_bucket_small *asb;
+	struct latency_bucket_large *alb;
+	struct lm_buckets *pcpu_b;
+	unsigned long flags;
+	int cpu;
+	struct latency_model_params *old_params, *new_params;
+
+	spin_lock_irqsave(&model->update_lock, flags);
+
+	old_params = rcu_dereference_protected(model->params,
+				lockdep_is_held(&model->update_lock));
+	new_params = kmemdup(old_params, sizeof(*new_params), GFP_ATOMIC);
+	if (!new_params) {
+		spin_unlock_irqrestore(&model->update_lock, flags);
+		return;
+	}
+
+	// Aggregate data from all CPUs and reset per-cpu buckets.
+	for_each_possible_cpu(cpu) {
+		pcpu_b = per_cpu_ptr(model->pcpu_buckets, cpu);
+
+		for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) {
+			if (pcpu_b->small_bucket[i].sum_of_weights) {
+				asb = &aggr->small_bucket[i];
+				asb->sum_of_weights +=
+					pcpu_b->small_bucket[i].sum_of_weights;
+				asb->weighted_sum_latency +=
+					pcpu_b->small_bucket[i].weighted_sum_latency;
+			}
+			if (pcpu_b->large_bucket[i].sum_of_weights) {
+				alb = &aggr->large_bucket[i];
+				alb->sum_of_weights +=
+					pcpu_b->large_bucket[i].sum_of_weights;
+				alb->weighted_sum_latency +=
+					pcpu_b->large_bucket[i].weighted_sum_latency;
+				alb->weighted_sum_block_size +=
+					pcpu_b->large_bucket[i].weighted_sum_block_size;
+			}
+		}
+		// Reset per-cpu buckets after aggregating
+		reset_buckets(pcpu_b);
+	}
+
+	// Count the number of entries in aggregated buckets
+	small_weight = lm_count_small_entries(aggr->small_bucket);
+	large_weight = lm_count_large_entries(aggr->large_bucket);
+
+	// Whether enough time has elapsed since the last update
+	now = jiffies;
+	time_elapsed = unlikely(!new_params->base) ||
+		new_params->last_update_jiffies +
+		msecs_to_jiffies(LM_INTERVAL_THRESHOLD) <= now;
+
+	// Update small buckets
+	if (small_weight && (time_elapsed ||
+			LM_SAMPLES_THRESHOLD <= small_weight || !new_params->base)) {
+		small_processed = lm_update_small_buckets(model, new_params,
+			aggr->small_bucket, small_weight, !new_params->base);
+		memset(&aggr->small_bucket[0], 0, sizeof(aggr->small_bucket));
+	}
+	// Update large buckets
+	if (large_weight && (time_elapsed ||
+			LM_SAMPLES_THRESHOLD <= large_weight || !new_params->slope)) {
+		large_processed = lm_update_large_buckets(model, new_params,
+			aggr->large_bucket, large_weight, !new_params->slope);
+		memset(&aggr->large_bucket[0], 0, sizeof(aggr->large_bucket));
+	}
+
+	// Update the base parameter if small bucket was processed
+	if (small_processed && likely(new_params->small_count))
+		new_params->base = div_u64(new_params->small_sum_delay,
+			new_params->small_count);
+
+	// Update the slope parameter if large bucket was processed
+	if (large_processed && likely(new_params->large_sum_bsize))
+		new_params->slope = div_u64(new_params->large_sum_delay,
+			DIV_ROUND_UP_ULL(new_params->large_sum_bsize, 1024));
+
+	// Update last updated jiffies if update happened or time has elapsed
+	if (small_processed || large_processed || time_elapsed)
+		new_params->last_update_jiffies = now;
+
+	rcu_assign_pointer(model->params, new_params);
+	spin_unlock_irqrestore(&model->update_lock, flags);
+
+	kfree_rcu(old_params, rcu);
+}
+
+// Determine the bucket index for a given measured and predicted latency
+static u8 lm_input_bucket_index(u64 measured, u64 predicted) {
+	u8 bucket_index;
+
+	if (measured < predicted * 2)
+		bucket_index = div_u64((measured * 20), predicted);
+	else if (measured < predicted * 5)
+		bucket_index = div_u64((measured * 10), predicted) + 20;
+	else
+		bucket_index = div_u64((measured * 3), predicted) + 40;
+
+	return bucket_index;
+}
+
+// Input latency data into the latency model
+static void latency_model_input(struct adios_data *ad,
+		struct latency_model *model,
+		u32 block_size, u64 latency, u64 pred_lat, u32 weight) {
+	unsigned long flags;
+	u8 bucket_index;
+	struct lm_buckets *buckets;
+	u64 current_base;
+	struct latency_model_params *params;
+
+	local_irq_save(flags);
+	buckets = per_cpu_ptr(model->pcpu_buckets, __smp_processor_id());
+
+	rcu_read_lock();
+	params = rcu_dereference(model->params);
+	current_base = params->base;
+	rcu_read_unlock();
+
+	if (block_size <= LM_BLOCK_SIZE_THRESHOLD) {
+		// Handle small requests
+		bucket_index = lm_input_bucket_index(latency, current_base ?: 1);
+
+		if (bucket_index >= LM_LAT_BUCKET_COUNT)
+			bucket_index = LM_LAT_BUCKET_COUNT - 1;
+
+		buckets->small_bucket[bucket_index].sum_of_weights += weight;
+		buckets->small_bucket[bucket_index].weighted_sum_latency +=
+			latency * weight;
+
+		local_irq_restore(flags);
+
+		if (unlikely(!current_base)) {
+			latency_model_update(ad, model);
+			return;
+		}
+	} else {
+		// Handle large requests
+		if (!current_base || !pred_lat) {
+			local_irq_restore(flags);
+			return;
+		}
+
+		bucket_index = lm_input_bucket_index(latency, pred_lat);
+
+		if (bucket_index >= LM_LAT_BUCKET_COUNT)
+			bucket_index = LM_LAT_BUCKET_COUNT - 1;
+
+		buckets->large_bucket[bucket_index].sum_of_weights += weight;
+		buckets->large_bucket[bucket_index].weighted_sum_latency +=
+			latency * weight;
+		buckets->large_bucket[bucket_index].weighted_sum_block_size +=
+			block_size * weight;
+
+		local_irq_restore(flags);
+	}
+}
+
+// Predict the latency for a given block size using the latency model
+static u64 latency_model_predict(struct latency_model *model, u32 block_size) {
+	u64 result;
+	struct latency_model_params *params;
+
+	rcu_read_lock();
+	params = rcu_dereference(model->params);
+
+	result = params->base;
+	if (block_size > LM_BLOCK_SIZE_THRESHOLD)
+		result += params->slope *
+			DIV_ROUND_UP_ULL(block_size - LM_BLOCK_SIZE_THRESHOLD, 1024);
+
+	rcu_read_unlock();
+
+	return result;
+}
+
+// Determine the type of operation based on request flags
+static u8 adios_optype(struct request *rq) {
+	switch (rq->cmd_flags & REQ_OP_MASK) {
+	case REQ_OP_READ:
+		return ADIOS_READ;
+	case REQ_OP_WRITE:
+		return ADIOS_WRITE;
+	case REQ_OP_DISCARD:
+		return ADIOS_DISCARD;
+	default:
+		return ADIOS_OTHER;
+	}
+}
+
+static inline u8 adios_optype_not_read(struct request *rq) {
+	return (rq->cmd_flags & REQ_OP_MASK) != REQ_OP_READ;
+}
+
+// Helper function to retrieve adios_rq_data from a request
+static inline struct adios_rq_data *get_rq_data(struct request *rq) {
+	return rq->elv.priv[0];
+}
+
+static inline
+void set_adios_state(struct adios_data *ad, u32 shift, u32 idx, bool flag) {
+	if (flag)
+		atomic_or(1U << (idx + shift), &ad->state);
+	else
+		atomic_andnot(1U << (idx + shift), &ad->state);
+}
+
+static inline u32 get_adios_state(struct adios_data *ad)
+{ return atomic_read(&ad->state); }
+
+static inline u32 eval_this_adios_state(u32 state, u32 shift)
+{ return (state >> shift) & 0x3; }
+
+static inline u32 eval_adios_state(struct adios_data *ad, u32 shift)
+{ return eval_this_adios_state(get_adios_state(ad), shift); }
+
+// Add a request to the deadline-sorted red-black tree
+static void add_to_dl_tree(
+		struct adios_data *ad, bool dl_idx, struct request *rq) {
+	struct rb_root_cached *root = &ad->dl_tree[dl_idx];
+	struct rb_node **link = &(root->rb_root.rb_node), *parent = NULL;
+	bool leftmost = true;
+	struct adios_rq_data *rd = get_rq_data(rq);
+	struct dl_group *dlg;
+	u64 deadline;
+	bool was_empty = RB_EMPTY_ROOT(&root->rb_root);
+
+	/* Tier-2: Synchronous Requests
+	 * - Needs to be FIFO within a same optype
+	 * - Relaxed order between different optypes
+	 * - basically needs to be processed in early time */
+	rd->deadline = rq->start_time_ns;
+
+	/* Tier-3: Aynchronous Requests
+	 * - Can be reordered and delayed freely */
+	if (!(rq->cmd_flags & REQ_SYNC)) {
+		rd->deadline += ad->latency_target[adios_optype(rq)];
+		if (!compliant(ad, ADIOS_CF_FIXORDER))
+			rd->deadline += rd->pred_lat;
+	}
+
+	// Now quantize the deadline (-> dlg->deadline == RB-Tree key)
+	deadline = rd->deadline & ~((1ULL << ADIOS_QUANTUM_SHIFT) - 1);
+
+	while (*link) {
+		dlg = rb_entry(*link, struct dl_group, node);
+		s64 diff = deadline - dlg->deadline;
+
+		parent = *link;
+		if (diff < 0) {
+			link = &((*link)->rb_left);
+		} else if (diff > 0) {
+			link = &((*link)->rb_right);
+			leftmost = false;
+		} else { // diff == 0
+			goto found;
+		}
+	}
+
+	dlg = rb_entry_safe(parent, struct dl_group, node);
+	if (!dlg || dlg->deadline != deadline) {
+		dlg = kmem_cache_zalloc(ad->dl_group_pool, GFP_ATOMIC);
+		if (!dlg)
+			return;
+		dlg->deadline = deadline;
+		INIT_LIST_HEAD(&dlg->rqs);
+		rb_link_node(&dlg->node, parent, link);
+		rb_insert_color_cached(&dlg->node, root, leftmost);
+	}
+found:
+	list_add_tail(&rd->dl_node, &dlg->rqs);
+	rd->dl_group = &dlg->rqs;
+
+	if (was_empty)
+		set_adios_state(ad, ADIOS_STATE_DL, dl_idx, true);
+}
+
+// Remove a request from the deadline-sorted red-black tree
+static void del_from_dl_tree(
+		struct adios_data *ad, bool dl_idx, struct request *rq) {
+	struct rb_root_cached *root = &ad->dl_tree[dl_idx];
+	struct adios_rq_data *rd = get_rq_data(rq);
+	struct dl_group *dlg = container_of(rd->dl_group, struct dl_group, rqs);
+
+	list_del_init(&rd->dl_node);
+	if (list_empty(&dlg->rqs)) {
+		rb_erase_cached(&dlg->node, root);
+		kmem_cache_free(ad->dl_group_pool, dlg);
+	}
+	rd->dl_group = NULL;
+
+	if (RB_EMPTY_ROOT(&ad->dl_tree[dl_idx].rb_root))
+		set_adios_state(ad, ADIOS_STATE_DL, dl_idx, false);
+}
+
+// Remove a request from the scheduler
+static void remove_request(struct adios_data *ad, struct request *rq) {
+	bool dl_idx = adios_optype_not_read(rq);
+	struct request_queue *q = rq->q;
+	struct adios_rq_data *rd = get_rq_data(rq);
+
+	list_del_init(&rq->queuelist);
+
+	// We might not be on the rbtree, if we are doing an insert merge
+	if (rd->dl_group)
+		del_from_dl_tree(ad, dl_idx, rq);
+
+	elv_rqhash_del(q, rq);
+	if (q->last_merge == rq)
+		q->last_merge = NULL;
+}
+
+// Convert a queue depth to the corresponding word depth for shallow allocation
+static int to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth) {
+	struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags;
+	const unsigned int nrr = hctx->queue->nr_requests;
+
+	return ((qdepth << bt->sb.shift) + nrr - 1) / nrr;
+}
+
+// We limit the depth of request allocation for asynchronous and write requests
+static void adios_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) {
+	struct adios_data *ad = data->q->elevator->elevator_data;
+
+	// Do not throttle synchronous reads
+	if (op_is_sync(opf) && !op_is_write(opf))
+		return;
+
+	data->shallow_depth = to_word_depth(data->hctx, ad->async_depth);
+}
+
+// The number of requests in the queue was notified from the block layer
+static void adios_depth_updated(struct request_queue *q) {
+	struct adios_data *ad = q->elevator->elevator_data;
+
+	ad->async_depth = q->nr_requests;
+	blk_mq_set_min_shallow_depth(q, 1);
+}
+
+// Handle request merging after a merge operation
+static void adios_request_merged(struct request_queue *q, struct request *req,
+				  enum elv_merge type) {
+	bool dl_idx = adios_optype_not_read(req);
+	struct adios_data *ad = q->elevator->elevator_data;
+
+	// Reposition request in the deadline-sorted tree
+	del_from_dl_tree(ad, dl_idx, req);
+	add_to_dl_tree(ad, dl_idx, req);
+}
+
+// Handle merging of requests after one has been merged into another
+static void adios_merged_requests(struct request_queue *q, struct request *req,
+				   struct request *next) {
+	struct adios_data *ad = q->elevator->elevator_data;
+
+	lockdep_assert_held(&ad->lock);
+
+	// kill knowledge of next, this one is a goner
+	remove_request(ad, next);
+}
+
+// Try to merge a bio into an existing rq before associating it with an rq
+static bool adios_bio_merge(struct request_queue *q, struct bio *bio,
+		unsigned int nr_segs) {
+	unsigned long flags;
+	struct adios_data *ad = q->elevator->elevator_data;
+	struct request *free = NULL;
+	bool ret;
+
+	if (eval_adios_state(ad, ADIOS_STATE_BP))
+		return false;
+
+	if (!spin_trylock_irqsave(&ad->lock, flags))
+		return false;
+
+	ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
+	spin_unlock_irqrestore(&ad->lock, flags);
+
+	if (free)
+		blk_mq_free_request(free);
+
+	return ret;
+}
+
+static bool merge_or_insert_to_dl_tree(struct adios_data *ad,
+		struct request *rq, struct request_queue *q, struct list_head *free) {
+	if (blk_mq_sched_try_insert_merge(q, rq, free))
+		return true;
+
+	bool dl_idx = adios_optype_not_read(rq);
+	add_to_dl_tree(ad, dl_idx, rq);
+
+	if (rq_mergeable(rq)) {
+		elv_rqhash_add(q, rq);
+		if (!q->last_merge)
+			q->last_merge = rq;
+	}
+
+	return false;
+}
+
+static void insert_to_prio_queue(struct adios_data *ad,
+		struct request *rq, bool pq_idx) {
+	struct adios_rq_data *rd = get_rq_data(rq);
+
+	/* We're sure that rd->managed == true */
+	union adios_in_flight_rqs ifr = {
+		.count          = 1,
+		.total_pred_lat = rd->pred_lat,
+	};
+	atomic64_add(ifr.scalar, &ad->in_flight_rqs.atomic);
+
+	scoped_guard(spinlock_irqsave, &ad->pq_lock) {
+		bool was_empty = list_empty(&ad->prio_queue[pq_idx]);
+		list_add_tail(&rq->queuelist, &ad->prio_queue[pq_idx]);
+		if (was_empty)
+			set_adios_state(ad, ADIOS_STATE_PQ, pq_idx, true);
+	}
+}
+
+// Insert a request into the scheduler (after Read & Write models stabilized)
+static void insert_request_post_stability(struct blk_mq_hw_ctx *hctx,
+		struct request *rq, blk_insert_t insert_flags, struct list_head *free) {
+	struct request_queue *q = hctx->queue;
+	struct adios_data *ad = q->elevator->elevator_data;
+	struct adios_rq_data *rd = get_rq_data(rq);
+	u8 optype = adios_optype(rq);
+	bool rq_is_flush;
+
+	rd->managed = true;
+	rd->block_size = blk_rq_bytes(rq);
+	rd->pred_lat =
+		latency_model_predict(&ad->latency_model[optype], rd->block_size);
+
+	/* Tier-0: BLK_MQ_INSERT_AT_HEAD Requests */
+	if (insert_flags & BLK_MQ_INSERT_AT_HEAD) {
+		insert_to_prio_queue(ad, rq, 0);
+		return;
+	}
+
+	/*
+	 * Strict Barrier Handling for REQ_OP_FLUSH:
+	 * If a flush request arrives, or if the scheduler is already in a
+	 * barrier-pending state, all subsequent requests are diverted to a
+	 * separate barrier_queue. This ensures that no new requests are processed
+	 * until all work preceding the barrier is complete.
+	 */
+	rq_is_flush = rq->cmd_flags & REQ_OP_FLUSH;
+	if (eval_adios_state(ad, ADIOS_STATE_BP) || rq_is_flush) {
+		scoped_guard(spinlock_irqsave, &ad->barrier_lock) {
+			if (rq_is_flush)
+				set_adios_state(ad, ADIOS_STATE_BP, 0, true);
+			list_add_tail(&rq->queuelist, &ad->barrier_queue);
+		}
+		return;
+	}
+
+	if (merge_or_insert_to_dl_tree(ad, rq, q, free))
+		return;
+}
+
+// Insert a request into the scheduler (before Read & Write models stabilizes)
+static void insert_request_pre_stability(struct blk_mq_hw_ctx *hctx,
+		struct request *rq, blk_insert_t insert_flags, struct list_head *free) {
+	struct adios_data *ad = hctx->queue->elevator->elevator_data;
+	struct adios_rq_data *rd = get_rq_data(rq);
+	u8 optype = adios_optype(rq);
+	u8 pq_idx = !(insert_flags & BLK_MQ_INSERT_AT_HEAD);
+	bool models_stable = false;
+
+	rd->managed = true;
+	rd->block_size = blk_rq_bytes(rq);
+	rd->pred_lat =
+		latency_model_predict(&ad->latency_model[optype], rd->block_size);
+
+	insert_to_prio_queue(ad, rq, pq_idx);
+
+	rcu_read_lock();
+	if (rcu_dereference(ad->latency_model[ADIOS_READ].params)->base > 0 &&
+		rcu_dereference(ad->latency_model[ADIOS_WRITE].params)->base > 0)
+			models_stable = true;
+	rcu_read_unlock();
+
+	if (models_stable)
+		ad->insert_request_fn = insert_request_post_stability;
+}
+
+// Insert multiple requests into the scheduler
+static void adios_insert_requests(struct blk_mq_hw_ctx *hctx,
+				   struct list_head *list,
+				   blk_insert_t insert_flags) {
+	struct request_queue *q = hctx->queue;
+	struct adios_data *ad = q->elevator->elevator_data;
+	struct request *rq;
+	bool stop = false;
+	LIST_HEAD(free);
+
+	do {
+	scoped_guard(spinlock_irqsave, &ad->lock)
+	for (int i = 0; i < ADIOS_MAX_INSERTS_PER_LOCK; i++) {
+		if (list_empty(list)) {
+			stop = true;
+			break;
+		}
+		rq = list_first_entry(list, struct request, queuelist);
+		list_del_init(&rq->queuelist);
+		ad->insert_request_fn(hctx, rq, insert_flags, &free);
+	}} while (!stop);
+
+	blk_mq_free_requests(&free);
+}
+
+// Prepare a request before it is inserted into the scheduler
+static void adios_prepare_request(struct request *rq) {
+	struct adios_data *ad = rq->q->elevator->elevator_data;
+	struct adios_rq_data *rd = get_rq_data(rq);
+
+	rq->elv.priv[0] = NULL;
+
+	/* Allocate adios_rq_data from the memory pool */
+	rd = kmem_cache_zalloc(ad->rq_data_pool, GFP_ATOMIC);
+	if (WARN(!rd, "adios_prepare_request: "
+			"Failed to allocate memory from rq_data_pool. rd is NULL\n"))
+		return;
+
+	rd->rq = rq;
+	rq->elv.priv[0] = rd;
+}
+
+static struct adios_rq_data *get_dl_first_rd(struct adios_data *ad, bool idx) {
+	struct rb_root_cached *root = &ad->dl_tree[idx];
+	struct rb_node *first = rb_first_cached(root);
+	struct dl_group *dl_group = rb_entry(first, struct dl_group, node);
+
+	return list_first_entry(&dl_group->rqs, struct adios_rq_data, dl_node);
+}
+
+// Comparison function for sorting requests by block address
+static int cmp_rq_pos(void *priv,
+		const struct list_head *a, const struct list_head *b) {
+	struct request *rq_a = list_entry(a, struct request, queuelist);
+	struct request *rq_b = list_entry(b, struct request, queuelist);
+	u64 pos_a = blk_rq_pos(rq_a);
+	u64 pos_b = blk_rq_pos(rq_b);
+
+	return (int)(pos_a > pos_b) - (int)(pos_a < pos_b);
+}
+
+#ifndef list_last_entry_or_null
+#define list_last_entry_or_null(ptr, type, member) \
+	(!list_empty(ptr) ? list_last_entry(ptr, type, member) : NULL)
+#endif
+
+// Update the elevator direction
+static void update_elv_direction(struct adios_data *ad) {
+	if (!ad->is_rotational)
+		return;
+
+	bool page = ad->bq_page;
+	struct list_head *q = &ad->batch_queue[page][1];
+	if (ad->bq_batch_order[page] < ADIOS_BO_ELEVATOR || list_empty(q)) {
+		ad->elv_direction = 0;
+		return;
+	}
+
+	// Get first and last request positions in the queue
+	struct request *rq_a = list_first_entry(q, struct request, queuelist);
+	struct request *rq_b = list_last_entry (q, struct request, queuelist);
+	u64 pos_a = blk_rq_pos(rq_a);
+	u64 pos_b = blk_rq_pos(rq_b);
+	u64 avg_rq_pos = (pos_a + pos_b) >> 1;
+
+	ad->elv_direction = !!(ad->head_pos > avg_rq_pos);
+}
+
+// Fill the batch queues with requests from the deadline-sorted red-black tree
+static bool fill_batch_queues(struct adios_data *ad, u64 tpl) {
+	struct adios_rq_data *rd;
+	struct request *rq;
+	struct list_head *dest_q;
+	u8  dest_idx;
+	u64 added_lat = 0;
+	u32 optype_count[ADIOS_OPTYPES] = {0};
+	u32 count = 0;
+	u8 optype;
+	bool page = !ad->bq_page, dl_idx, bias_idx, update_bias;
+	u32 dl_queued;
+	u8 bq_batch_order;
+	bool stop = false;
+
+	// Reset batch queue counts for the back page
+	memset(&ad->batch_count[page], 0, sizeof(ad->batch_count[page]));
+
+	ad->bq_batch_order[page] =
+		bq_batch_order = ad->batch_order;
+
+	do {
+	scoped_guard(spinlock_irqsave, &ad->lock)
+	for (int i = 0; i < ADIOS_MAX_DELETES_PER_LOCK; i++) {
+		bool has_base = false;
+
+		dl_queued = eval_adios_state(ad, ADIOS_STATE_DL);
+		// Check if there are any requests queued in the deadline tree
+		if (!dl_queued) {
+			stop = true;
+			break;
+		}
+
+		// Reads if both queues have requests, otherwise pick the non-empty.
+		dl_idx = dl_queued >> 1;
+
+		// Get the first request from the deadline-sorted tree
+		rd = get_dl_first_rd(ad, dl_idx);
+
+		bias_idx = ad->dl_bias < 0;
+		// If read and write requests are queued, choose one based on bias
+		if (dl_queued == 0x3) {
+			struct adios_rq_data *trd[2] = {get_dl_first_rd(ad, 0), rd};
+			rd = trd[bias_idx];
+
+			update_bias = (trd[bias_idx]->deadline > trd[!bias_idx]->deadline);
+		} else
+			update_bias = (bias_idx == dl_idx);
+
+		rq = rd->rq;
+		optype = adios_optype(rq);
+
+		rcu_read_lock();
+		has_base =
+			!!rcu_dereference(ad->latency_model[optype].params)->base;
+		rcu_read_unlock();
+
+		// Check batch size and total predicted latency
+		if (count && (!has_base ||
+				ad->batch_count[page][optype] >= ad->batch_limit[optype] ||
+				(tpl + added_lat + rd->pred_lat) > ad->global_latency_window)) {
+			stop = true;
+			break;
+		}
+
+		if (update_bias) {
+			s64 sign = ((s64)bias_idx << 1) - 1;
+			if (unlikely(!rd->pred_lat))
+				ad->dl_bias = sign;
+			else
+				// Adjust the bias based on the predicted latency
+				ad->dl_bias += sign * (s64)((rd->pred_lat *
+					adios_prio_to_wmult[ad->dl_prio[bias_idx] + 20]) >> 10);
+		}
+
+		remove_request(ad, rq);
+
+		// Add request to the corresponding batch queue
+		dest_idx = (bq_batch_order == ADIOS_BO_OPTYPE || optype == ADIOS_OTHER)?
+			optype : !!(rd->deadline != rq->start_time_ns);
+		dest_q = &ad->batch_queue[page][dest_idx];
+		list_add_tail(&rq->queuelist, dest_q);
+		ad->bq_state[page] |= 1U << dest_idx;
+		ad->batch_count[page][optype]++;
+		optype_count[optype]++;
+		added_lat += rd->pred_lat;
+		count++;
+	}} while (!stop);
+
+	if (bq_batch_order == ADIOS_BO_ELEVATOR && ad->batch_count[page][1] > 1)
+			list_sort(NULL, &ad->batch_queue[page][1], cmp_rq_pos);
+
+	if (count) {
+		/* We're sure that every request's rd->managed == true */
+		union adios_in_flight_rqs ifr = {
+			.count          = count,
+			.total_pred_lat = added_lat,
+		};
+		atomic64_add(ifr.scalar, &ad->in_flight_rqs.atomic);
+
+		set_adios_state(ad, ADIOS_STATE_BQ, page, true);
+
+		for (optype = 0; optype < ADIOS_OPTYPES; optype++)
+			if (ad->batch_actual_max_size[optype] < optype_count[optype])
+				ad->batch_actual_max_size[optype] = optype_count[optype];
+		if (ad->batch_actual_max_total < count)
+			ad->batch_actual_max_total = count;
+	}
+	return count;
+}
+
+// Flip to the next batch queue page
+static void flip_bq_page(struct adios_data *ad) {
+	ad->bq_page = !ad->bq_page;
+	update_elv_direction(ad);
+}
+
+// Pop a request from the specified index (optype or elevator tier)
+static inline struct request *pop_bq_request(
+		struct adios_data *ad, u8 idx, bool direction) {
+	bool page = ad->bq_page;
+	struct list_head *q = &ad->batch_queue[page][idx];
+	struct request *rq = direction ?
+		list_last_entry_or_null (q, struct request, queuelist):
+		list_first_entry_or_null(q, struct request, queuelist);
+	if (rq) {
+		list_del_init(&rq->queuelist);
+		if (list_empty(q))
+			ad->bq_state[page] &= ~(1U << idx);
+	}
+	return rq;
+}
+
+static struct request *pop_next_bq_request_optype(struct adios_data *ad) {
+	u32 bq_state = ad->bq_state[ad->bq_page];
+	if (!bq_state) return NULL;
+
+	struct request *rq;
+	u32 bq_idx = __builtin_ctz(bq_state);
+
+	// Dispatch based on optype (FIFO within each) or single-queue elevator
+	rq = pop_bq_request(ad, bq_idx, false);
+	return rq;
+}
+
+static struct request *pop_next_bq_request_elevator(struct adios_data *ad) {
+	u32 bq_state = ad->bq_state[ad->bq_page];
+	if (!bq_state) return NULL;
+
+	struct request *rq;
+	u32 bq_idx = __builtin_ctz(bq_state);
+	bool direction = (bq_idx == 1) & ad->elv_direction;
+
+	// Tier-2 (sync) is always high priority
+	// Tier-3 (async) uses the pre-calculated elevator direction
+	rq = pop_bq_request(ad, bq_idx, direction);
+
+	/* If batch queue for the sync requests just became empty */
+	if (bq_idx == 0 && rq && !(bq_state & 0x1))
+		update_elv_direction(ad);
+
+	return rq;
+}
+
+// Returns the state of the batch queue page
+static inline bool bq_page_has_rq(u32 bq_state, bool page) {
+	return bq_state & (1U << page);
+}
+
+// Dispatch a request from the batch queues
+static struct request *dispatch_from_bq(struct adios_data *ad) {
+	struct request *rq;
+
+	guard(spinlock_irqsave)(&ad->bq_lock);
+
+	u32 state = get_adios_state(ad);
+	u32 bq_state = eval_this_adios_state(state, ADIOS_STATE_BQ);
+	u32 bq_curr_page_has_rq = bq_page_has_rq(bq_state, ad->bq_page);
+	union adios_in_flight_rqs ifr;
+	ifr.scalar = atomic64_read(&ad->in_flight_rqs.atomic);
+	u64 tpl = ifr.total_pred_lat;
+
+	// Refill the batch queues if the back page is empty, dl_tree has work, and
+	// current page is empty or the total ongoing latency is below the threshold
+	if (!bq_page_has_rq(bq_state, !ad->bq_page) &&
+			(!bq_curr_page_has_rq || (!tpl || tpl < div_u64(
+			ad->global_latency_window * ad->bq_refill_below_ratio, 100))) &&
+			eval_this_adios_state(state, ADIOS_STATE_DL))
+		fill_batch_queues(ad, tpl);
+
+	// If current batch queue page is empty, and the other page has work, flip
+	if (!bq_curr_page_has_rq &&
+			bq_page_has_rq(eval_adios_state(ad, ADIOS_STATE_BQ), !ad->bq_page))
+		flip_bq_page(ad);
+
+	// Use the per-page state to decide the dispatch logic, ensuring correctness
+	rq = (ad->bq_batch_order[ad->bq_page] == ADIOS_BO_ELEVATOR) ?
+		pop_next_bq_request_elevator(ad):
+		pop_next_bq_request_optype(ad);
+
+	if (rq) {
+		bool page = ad->bq_page;
+		bool is_empty = !ad->bq_state[page];
+		if (is_empty)
+			set_adios_state(ad, ADIOS_STATE_BQ, page, false);
+		return rq;
+	}
+
+	return NULL;
+}
+
+// Dispatch a request from the priority queue
+static struct request *dispatch_from_pq(struct adios_data *ad) {
+	struct request *rq = NULL;
+
+	guard(spinlock_irqsave)(&ad->pq_lock);
+	u32 pq_state = eval_adios_state(ad, ADIOS_STATE_PQ);
+	u8  pq_idx = pq_state >> 1;
+	struct list_head *q = &ad->prio_queue[pq_idx];
+
+	if (unlikely(list_empty(q))) return NULL;
+
+	rq = list_first_entry(q, struct request, queuelist);
+	list_del_init(&rq->queuelist);
+	if (list_empty(q)) {
+		set_adios_state(ad, ADIOS_STATE_PQ, pq_idx, false);
+		update_elv_direction(ad);
+	}
+	return rq;
+}
+
+static bool release_barrier_requests(struct adios_data *ad) {
+	u32 moved_count = 0;
+	LIST_HEAD(local_list);
+
+	scoped_guard(spinlock_irqsave, &ad->barrier_lock) {
+		if (!list_empty(&ad->barrier_queue)) {
+			struct request *trq, *next;
+			bool first_barrier_moved = false;
+
+			list_for_each_entry_safe(trq, next, &ad->barrier_queue, queuelist) {
+				if (!first_barrier_moved) {
+					list_del_init(&trq->queuelist);
+					insert_to_prio_queue(ad, trq, 1);
+					moved_count++;
+					first_barrier_moved = true;
+					continue;
+				}
+
+				if (trq->cmd_flags & REQ_OP_FLUSH)
+					break;
+
+				list_move_tail(&trq->queuelist, &local_list);
+				moved_count++;
+			}
+
+			if (list_empty(&ad->barrier_queue))
+				set_adios_state(ad, ADIOS_STATE_BP, 0, false);
+		}
+	}
+
+	if (!moved_count)
+		return false;
+
+	if (!list_empty(&local_list)) {
+		struct request *trq, *next;
+		LIST_HEAD(free_list);
+
+		/* ad->lock is already held */
+		list_for_each_entry_safe(trq, next, &local_list, queuelist) {
+			list_del_init(&trq->queuelist);
+			if (merge_or_insert_to_dl_tree(ad, trq, ad->queue, &free_list))
+				continue;
+		}
+
+		if (!list_empty(&free_list))
+			blk_mq_free_requests(&free_list);
+	}
+
+	return true;
+}
+
+// Dispatch a request to the hardware queue
+static struct request *adios_dispatch_request(struct blk_mq_hw_ctx *hctx) {
+	struct adios_data *ad = hctx->queue->elevator->elevator_data;
+	struct request *rq;
+
+retry:
+	rq = dispatch_from_pq(ad);
+	if (rq)
+		goto found;
+
+	rq = dispatch_from_bq(ad);
+	if (rq)
+		goto found;
+
+	/*
+	 * If all active queues are empty, check if we need to process a barrier.
+	 * This is the trigger to release requests that were held in barrier_queue
+	 * due to a REQ_OP_FLUSH barrier.
+	 */
+	if (eval_adios_state(ad, ADIOS_STATE_BP)) {
+		bool barrier_released = false;
+		scoped_guard(spinlock_irqsave, &ad->lock)
+			barrier_released = release_barrier_requests(ad);
+		if (barrier_released)
+			goto retry;
+	}
+
+	return NULL;
+found:
+	if (ad->is_rotational)
+		ad->head_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
+
+	rq->rq_flags |= RQF_STARTED;
+	return rq;
+}
+
+// Timer callback function to periodically update latency models
+static void update_timer_callback(struct timer_list *t) {
+	struct adios_data *ad = timer_container_of(ad, t, update_timer);
+
+	for (u8 optype = 0; optype < ADIOS_OPTYPES; optype++)
+		latency_model_update(ad, &ad->latency_model[optype]);
+}
+
+// Handle the completion of a request
+static void adios_completed_request(struct request *rq, u64 now) {
+	struct adios_data *ad = rq->q->elevator->elevator_data;
+	struct adios_rq_data *rd = get_rq_data(rq);
+	union adios_in_flight_rqs ifr = { .scalar = 0 };
+
+	if (rd->managed) {
+		union adios_in_flight_rqs ifr_to_sub = {
+			.count          = 1,
+			.total_pred_lat = rd->pred_lat,
+		};
+		ifr.scalar = atomic64_sub_return(
+			ifr_to_sub.scalar, &ad->in_flight_rqs.atomic);
+	}
+	u8 optype = adios_optype(rq);
+
+	if (optype == ADIOS_OTHER) {
+		// Non-positional commands make the head position unpredictable.
+		// Invalidate our knowledge of the last completed position.
+		if (ad->is_rotational)
+			ad->last_completed_pos = 0;
+		return;
+	}
+
+	u64 lct = ad->last_completed_time ?: rq->io_start_time_ns;
+	ad->last_completed_time = (ifr.count) ? now : 0;
+
+	if (!rq->io_start_time_ns || !rd->block_size || unlikely(now < lct))
+		return;
+
+	u64 latency = now - lct;
+	if (latency > ad->lat_model_latency_limit)
+		return;
+
+	u32 weight = 1;
+	if (ad->is_rotational) {
+		sector_t current_pos = blk_rq_pos(rq);
+		// Only calculate seek distance if we have a valid last position.
+		if (ad->last_completed_pos > 0) {
+			u64 seek_distance = abs(
+				(s64)current_pos - (s64)ad->last_completed_pos);
+			weight = 65 - __builtin_clzll(seek_distance);
+		}
+		// Update (or re-synchronize) our knowledge of the head position.
+		ad->last_completed_pos = current_pos + blk_rq_sectors(rq);
+	}
+
+	latency_model_input(ad, &ad->latency_model[optype],
+		rd->block_size, latency, rd->pred_lat, weight);
+	timer_reduce(&ad->update_timer, jiffies + msecs_to_jiffies(100));
+}
+
+// Clean up after a request is finished
+static void adios_finish_request(struct request *rq) {
+	struct adios_data *ad = rq->q->elevator->elevator_data;
+
+	if (rq->elv.priv[0]) {
+		// Free adios_rq_data back to the memory pool
+		kmem_cache_free(ad->rq_data_pool, get_rq_data(rq));
+		rq->elv.priv[0] = NULL;
+	}
+}
+
+// Check if there are any requests available for dispatch
+static bool adios_has_work(struct blk_mq_hw_ctx *hctx) {
+	struct adios_data *ad = hctx->queue->elevator->elevator_data;
+
+	return atomic_read(&ad->state) != 0;
+}
+
+// Initialize the scheduler-specific data when initializing the request queue
+static int adios_init_sched(struct request_queue *q, struct elevator_queue *eq) {
+	struct adios_data *ad;
+	int ret = -ENOMEM;
+	u8 optype = 0;
+
+	ad = kzalloc_node(sizeof(*ad), GFP_KERNEL, q->node);
+	if (!ad) {
+		pr_err("adios: Failed to create adios_data\n");
+		goto put_eq;
+	}
+
+	eq->elevator_data = ad;
+
+	// Create a memory pool for adios_rq_data
+	ad->rq_data_pool = kmem_cache_create("rq_data_pool",
+						sizeof(struct adios_rq_data),
+						0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!ad->rq_data_pool) {
+		pr_err("adios: Failed to create rq_data_pool\n");
+		goto free_ad;
+	}
+
+	/* Create a memory pool for dl_group */
+	ad->dl_group_pool = kmem_cache_create("dl_group_pool",
+						sizeof(struct dl_group),
+						0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!ad->dl_group_pool) {
+		pr_err("adios: Failed to create dl_group_pool\n");
+		goto destroy_rq_data_pool;
+	}
+
+	for (int i = 0; i < ADIOS_PQ_LEVELS; i++)
+		INIT_LIST_HEAD(&ad->prio_queue[i]);
+
+	for (u8 i = 0; i < ADIOS_DL_TYPES; i++) {
+		ad->dl_tree[i] = RB_ROOT_CACHED;
+		ad->dl_prio[i] = default_dl_prio[i];
+	}
+	ad->dl_bias = 0;
+
+	for (u8 page = 0; page < ADIOS_BQ_PAGES; page++)
+		for (optype = 0; optype < ADIOS_OPTYPES; optype++)
+			INIT_LIST_HEAD(&ad->batch_queue[page][optype]);
+
+	ad->aggr_buckets = kzalloc(sizeof(*ad->aggr_buckets), GFP_KERNEL);
+	if (!ad->aggr_buckets) {
+		pr_err("adios: Failed to allocate aggregation buckets\n");
+		goto destroy_dl_group_pool;
+	}
+
+	for (optype = 0; optype < ADIOS_OPTYPES; optype++) {
+		struct latency_model *model = &ad->latency_model[optype];
+		struct latency_model_params *params;
+
+		spin_lock_init(&model->update_lock);
+		params = kzalloc(sizeof(*params), GFP_KERNEL);
+		if (!params) {
+			pr_err("adios: Failed to allocate latency_model_params\n");
+			goto free_buckets;
+		}
+		params->last_update_jiffies = jiffies;
+		RCU_INIT_POINTER(model->params, params);
+
+		model->pcpu_buckets = alloc_percpu(struct lm_buckets);
+		if (!model->pcpu_buckets) {
+			pr_err("adios: Failed to allocate per-CPU buckets\n");
+			kfree(params);
+			goto free_buckets;
+		}
+
+		model->lm_shrink_at_kreqs  = default_lm_shrink_at_kreqs;
+		model->lm_shrink_at_gbytes = default_lm_shrink_at_gbytes;
+		model->lm_shrink_resist    = default_lm_shrink_resist;
+	}
+
+	for (optype = 0; optype < ADIOS_OPTYPES; optype++) {
+		ad->latency_target[optype] = default_latency_target[optype];
+		ad->batch_limit[optype] = default_batch_limit[optype];
+	}
+
+	eq->elevator_data = ad;
+
+	ad->is_rotational = !!(q->limits.features & BLK_FEAT_ROTATIONAL);
+	ad->global_latency_window = (ad->is_rotational)?
+		default_global_latency_window_rotational:
+		default_global_latency_window;
+	ad->bq_refill_below_ratio = default_bq_refill_below_ratio;
+	ad->lat_model_latency_limit = default_lat_model_latency_limit;
+	ad->batch_order = default_batch_order;
+	ad->compliance_flags = default_compliance_flags;
+
+	ad->insert_request_fn = insert_request_pre_stability;
+
+	atomic_set(&ad->state, 0);
+
+	spin_lock_init(&ad->lock);
+	spin_lock_init(&ad->pq_lock);
+	spin_lock_init(&ad->bq_lock);
+	spin_lock_init(&ad->barrier_lock);
+	INIT_LIST_HEAD(&ad->barrier_queue);
+
+	timer_setup(&ad->update_timer, update_timer_callback, 0);
+
+	/* We dispatch from request queue wide instead of hw queue */
+	blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
+
+	ad->queue = q;
+	blk_stat_enable_accounting(q);
+
+	q->elevator = eq;
+	adios_depth_updated(q);
+	return 0;
+
+free_buckets:
+	pr_err("adios: Failed to allocate per-cpu buckets\n");
+	while (optype-- > 0) {
+		struct latency_model *prev_model = &ad->latency_model[optype];
+		kfree(rcu_access_pointer(prev_model->params));
+		free_percpu(prev_model->pcpu_buckets);
+	}
+	kfree(ad->aggr_buckets);
+destroy_dl_group_pool:
+	kmem_cache_destroy(ad->dl_group_pool);
+destroy_rq_data_pool:
+	kmem_cache_destroy(ad->rq_data_pool);
+free_ad:
+	kfree(ad);
+put_eq:
+	kobject_put(&eq->kobj);
+	return ret;
+}
+
+// Clean up and free resources when exiting the scheduler
+static void adios_exit_sched(struct elevator_queue *e) {
+	struct adios_data *ad = e->elevator_data;
+
+	timer_shutdown_sync(&ad->update_timer);
+
+	WARN_ON_ONCE(!list_empty(&ad->barrier_queue));
+	for (int i = 0; i < 2; i++)
+		WARN_ON_ONCE(!list_empty(&ad->prio_queue[i]));
+
+	for (u8 i = 0; i < ADIOS_OPTYPES; i++) {
+		struct latency_model *model = &ad->latency_model[i];
+		struct latency_model_params *params = rcu_access_pointer(model->params);
+
+		RCU_INIT_POINTER(model->params, NULL);
+		kfree_rcu(params, rcu);
+
+		free_percpu(model->pcpu_buckets);
+	}
+
+	synchronize_rcu();
+
+	kfree(ad->aggr_buckets);
+
+	if (ad->rq_data_pool)
+		kmem_cache_destroy(ad->rq_data_pool);
+
+	if (ad->dl_group_pool)
+		kmem_cache_destroy(ad->dl_group_pool);
+
+	blk_stat_disable_accounting(ad->queue);
+
+	kfree(ad);
+}
+
+static void sideload_latency_model(
+		struct latency_model *model, u64 base, u64 slope) {
+	struct latency_model_params *old_params, *new_params;
+	unsigned long flags;
+
+	new_params = kzalloc(sizeof(*new_params), GFP_KERNEL);
+	if (!new_params)
+		return;
+
+	spin_lock_irqsave(&model->update_lock, flags);
+
+	old_params = rcu_dereference_protected(model->params,
+			lockdep_is_held(&model->update_lock));
+
+	new_params->last_update_jiffies = jiffies;
+
+	// Initialize base and its statistics as a single sample.
+	new_params->base = base;
+	new_params->small_sum_delay = base;
+	new_params->small_count = 1;
+
+	// Initialize slope and its statistics as a single sample.
+	new_params->slope = slope;
+	new_params->large_sum_delay = slope;
+	new_params->large_sum_bsize = 1024; /* Corresponds to 1 KiB */
+
+	lm_reset_pcpu_buckets(model);
+
+	rcu_assign_pointer(model->params, new_params);
+	spin_unlock_irqrestore(&model->update_lock, flags);
+
+	kfree_rcu(old_params, rcu);
+}
+
+// Define sysfs attributes for operation types
+#define SYSFS_OPTYPE_DECL(name, optype) \
+static ssize_t adios_lat_model_##name##_show( \
+		struct elevator_queue *e, char *page) { \
+	struct adios_data *ad = e->elevator_data; \
+	struct latency_model *model = &ad->latency_model[optype]; \
+	struct latency_model_params *params; \
+	ssize_t len = 0; \
+	u64 base, slope; \
+	rcu_read_lock(); \
+	params = rcu_dereference(model->params); \
+	base = params->base; \
+	slope = params->slope; \
+	rcu_read_unlock(); \
+	len += sprintf(page,       "base : %llu ns\n", base); \
+	len += sprintf(page + len, "slope: %llu ns/KiB\n", slope); \
+	return len; \
+} \
+static ssize_t adios_lat_model_##name##_store( \
+		struct elevator_queue *e, const char *page, size_t count) { \
+	struct adios_data *ad = e->elevator_data; \
+	struct latency_model *model = &ad->latency_model[optype]; \
+	u64 base, slope; \
+	int ret; \
+	ret = sscanf(page, "%llu %llu", &base, &slope); \
+	if (ret != 2) \
+		return -EINVAL; \
+	sideload_latency_model(model, base, slope); \
+	reset_buckets(ad->aggr_buckets); \
+	return count; \
+} \
+static ssize_t adios_lat_target_##name##_show( \
+		struct elevator_queue *e, char *page) { \
+	struct adios_data *ad = e->elevator_data; \
+	return sprintf(page, "%llu\n", ad->latency_target[optype]); \
+} \
+static ssize_t adios_lat_target_##name##_store( \
+		struct elevator_queue *e, const char *page, size_t count) { \
+	struct adios_data *ad = e->elevator_data; \
+	unsigned long nsec; \
+	int ret; \
+	ret = kstrtoul(page, 10, &nsec); \
+	if (ret) \
+		return ret; \
+	sideload_latency_model(&ad->latency_model[optype], 0, 0); \
+	ad->latency_target[optype] = nsec; \
+	return count; \
+} \
+static ssize_t adios_batch_limit_##name##_show( \
+		struct elevator_queue *e, char *page) { \
+	struct adios_data *ad = e->elevator_data; \
+	return sprintf(page, "%u\n", ad->batch_limit[optype]); \
+} \
+static ssize_t adios_batch_limit_##name##_store( \
+		struct elevator_queue *e, const char *page, size_t count) { \
+	unsigned long max_batch; \
+	int ret; \
+	ret = kstrtoul(page, 10, &max_batch); \
+	if (ret || max_batch == 0) \
+		return -EINVAL; \
+	struct adios_data *ad = e->elevator_data; \
+	ad->batch_limit[optype] = max_batch; \
+	return count; \
+}
+
+SYSFS_OPTYPE_DECL(read, ADIOS_READ);
+SYSFS_OPTYPE_DECL(write, ADIOS_WRITE);
+SYSFS_OPTYPE_DECL(discard, ADIOS_DISCARD);
+
+// Show the maximum batch size actually achieved for each operation type
+static ssize_t adios_batch_actual_max_show(
+		struct elevator_queue *e, char *page) {
+	struct adios_data *ad = e->elevator_data;
+	u32 total_count, read_count, write_count, discard_count;
+
+	total_count = ad->batch_actual_max_total;
+	read_count = ad->batch_actual_max_size[ADIOS_READ];
+	write_count = ad->batch_actual_max_size[ADIOS_WRITE];
+	discard_count = ad->batch_actual_max_size[ADIOS_DISCARD];
+
+	return sprintf(page,
+		"Total  : %u\nDiscard: %u\nRead   : %u\nWrite  : %u\n",
+		total_count, discard_count, read_count, write_count);
+}
+
+#define SYSFS_ULL_DECL(field, min_val, max_val) \
+static ssize_t adios_##field##_show( \
+		struct elevator_queue *e, char *page) { \
+	struct adios_data *ad = e->elevator_data; \
+	return sprintf(page, "%llu\n", ad->field); \
+} \
+static ssize_t adios_##field##_store( \
+		struct elevator_queue *e, const char *page, size_t count) { \
+	struct adios_data *ad = e->elevator_data; \
+	unsigned long val; \
+	int ret; \
+	ret = kstrtoul(page, 10, &val); \
+	if (ret || val < (min_val) || val > (max_val)) \
+		return -EINVAL; \
+	ad->field = val; \
+	return count; \
+}
+
+SYSFS_ULL_DECL(global_latency_window, 0, ULLONG_MAX)
+SYSFS_ULL_DECL(compliance_flags, 0, ULLONG_MAX)
+
+#define SYSFS_INT_DECL(field, min_val, max_val) \
+static ssize_t adios_##field##_show( \
+		struct elevator_queue *e, char *page) { \
+	struct adios_data *ad = e->elevator_data; \
+	return sprintf(page, "%d\n", ad->field); \
+} \
+static ssize_t adios_##field##_store( \
+		struct elevator_queue *e, const char *page, size_t count) { \
+	struct adios_data *ad = e->elevator_data; \
+	int val; \
+	int ret; \
+	ret = kstrtoint(page, 10, &val); \
+	if (ret || val < (min_val) || val > (max_val)) \
+		return -EINVAL; \
+	ad->field = val; \
+	return count; \
+}
+
+SYSFS_INT_DECL(bq_refill_below_ratio, 0, 100)
+SYSFS_INT_DECL(lat_model_latency_limit, 0, 2*NSEC_PER_SEC)
+SYSFS_INT_DECL(batch_order, ADIOS_BO_OPTYPE, !!ad->is_rotational)
+
+// Show the read priority
+static ssize_t adios_read_priority_show(
+		struct elevator_queue *e, char *page) {
+	struct adios_data *ad = e->elevator_data;
+	return sprintf(page, "%d\n", ad->dl_prio[0]);
+}
+
+// Set the read priority
+static ssize_t adios_read_priority_store(
+		struct elevator_queue *e, const char *page, size_t count) {
+	struct adios_data *ad = e->elevator_data;
+	int prio;
+	int ret;
+
+	ret = kstrtoint(page, 10, &prio);
+	if (ret || prio < -20 || prio > 19)
+		return -EINVAL;
+
+	guard(spinlock_irqsave)(&ad->lock);
+	ad->dl_prio[0] = prio;
+	ad->dl_bias = 0;
+
+	return count;
+}
+
+// Reset batch queue statistics
+static ssize_t adios_reset_bq_stats_store(
+		struct elevator_queue *e, const char *page, size_t count) {
+	struct adios_data *ad = e->elevator_data;
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(page, 10, &val);
+	if (ret || val != 1)
+		return -EINVAL;
+
+	for (u8 i = 0; i < ADIOS_OPTYPES; i++)
+		ad->batch_actual_max_size[i] = 0;
+
+	ad->batch_actual_max_total = 0;
+
+	return count;
+}
+
+// Reset the latency model parameters or load them from user input
+static ssize_t adios_reset_lat_model_store(
+		struct elevator_queue *e, const char *page, size_t count)
+{
+	struct adios_data *ad = e->elevator_data;
+	struct latency_model *model;
+	int ret;
+
+	/*
+	 * Differentiate between two modes based on input format:
+	 * 1. "1": Fully reset the model (backward compatibility).
+	 * 2. "R_base R_slope W_base W_slope D_base D_slope": Load values.
+	 */
+	if (!strchr(page, ' ')) {
+		// Mode 1: Full reset.
+		unsigned long val;
+
+		ret = kstrtoul(page, 10, &val);
+		if (ret || val != 1)
+			return -EINVAL;
+
+		for (u8 i = 0; i < ADIOS_OPTYPES; i++) {
+			model = &ad->latency_model[i];
+			sideload_latency_model(model, 0, 0);
+		}
+	} else {
+		// Mode 2: Load initial values for all latency models.
+		u64 params[3][2]; /* 0:base, 1:slope for R, W, D */
+
+		ret = sscanf(page, "%llu %llu %llu %llu %llu %llu",
+			&params[ADIOS_READ   ][0], &params[ADIOS_READ   ][1],
+			&params[ADIOS_WRITE  ][0], &params[ADIOS_WRITE  ][1],
+			&params[ADIOS_DISCARD][0], &params[ADIOS_DISCARD][1]);
+
+		if (ret != 6)
+			return -EINVAL;
+
+		for (u8 i = ADIOS_READ; i <= ADIOS_DISCARD; i++) {
+			model = &ad->latency_model[i];
+			sideload_latency_model(model, params[i][0], params[i][1]);
+		}
+	}
+	reset_buckets(ad->aggr_buckets);
+
+	return count;
+}
+
+// Show the ADIOS version
+static ssize_t adios_version_show(struct elevator_queue *e, char *page) {
+	return sprintf(page, "%s\n", ADIOS_VERSION);
+}
+
+// Define sysfs attributes for dynamic thresholds
+#define SHRINK_THRESHOLD_ATTR_RW(name, model_field, min_value, max_value) \
+static ssize_t adios_shrink_##name##_store( \
+		struct elevator_queue *e, const char *page, size_t count) { \
+	struct adios_data *ad = e->elevator_data; \
+	unsigned long val; \
+	int ret; \
+	ret = kstrtoul(page, 10, &val); \
+	if (ret || val < min_value || val > max_value) \
+		return -EINVAL; \
+	for (u8 i = 0; i < ADIOS_OPTYPES; i++) { \
+		struct latency_model *model = &ad->latency_model[i]; \
+		unsigned long flags; \
+		spin_lock_irqsave(&model->update_lock, flags); \
+		model->model_field = val; \
+		spin_unlock_irqrestore(&model->update_lock, flags); \
+	} \
+	return count; \
+} \
+static ssize_t adios_shrink_##name##_show( \
+		struct elevator_queue *e, char *page) { \
+	struct adios_data *ad = e->elevator_data; \
+	u32 val = 0; \
+	unsigned long flags; \
+	struct latency_model *model = &ad->latency_model[0]; \
+	spin_lock_irqsave(&model->update_lock, flags); \
+	val = model->model_field; \
+	spin_unlock_irqrestore(&model->update_lock, flags); \
+	return sprintf(page, "%u\n", val); \
+}
+
+SHRINK_THRESHOLD_ATTR_RW(at_kreqs,  lm_shrink_at_kreqs,  1, 100000)
+SHRINK_THRESHOLD_ATTR_RW(at_gbytes, lm_shrink_at_gbytes, 1,   1000)
+SHRINK_THRESHOLD_ATTR_RW(resist,    lm_shrink_resist,    1,      3)
+
+// Define sysfs attributes
+#define AD_ATTR(name, show_func, store_func) \
+	__ATTR(name, 0644, show_func, store_func)
+#define AD_ATTR_RW(name) \
+	__ATTR(name, 0644, adios_##name##_show, adios_##name##_store)
+#define AD_ATTR_RO(name) \
+	__ATTR(name, 0444, adios_##name##_show, NULL)
+#define AD_ATTR_WO(name) \
+	__ATTR(name, 0200, NULL, adios_##name##_store)
+
+// Define sysfs attributes for ADIOS scheduler
+static struct elv_fs_entry adios_sched_attrs[] = {
+	AD_ATTR_RO(batch_actual_max),
+	AD_ATTR_RW(bq_refill_below_ratio),
+	AD_ATTR_RW(global_latency_window),
+	AD_ATTR_RW(lat_model_latency_limit),
+	AD_ATTR_RW(batch_order),
+	AD_ATTR_RW(compliance_flags),
+
+	AD_ATTR_RW(batch_limit_read),
+	AD_ATTR_RW(batch_limit_write),
+	AD_ATTR_RW(batch_limit_discard),
+
+	AD_ATTR_RW(lat_model_read),
+	AD_ATTR_RW(lat_model_write),
+	AD_ATTR_RW(lat_model_discard),
+
+	AD_ATTR_RW(lat_target_read),
+	AD_ATTR_RW(lat_target_write),
+	AD_ATTR_RW(lat_target_discard),
+
+	AD_ATTR_RW(shrink_at_kreqs),
+	AD_ATTR_RW(shrink_at_gbytes),
+	AD_ATTR_RW(shrink_resist),
+
+	AD_ATTR_RW(read_priority),
+
+	AD_ATTR_WO(reset_bq_stats),
+	AD_ATTR_WO(reset_lat_model),
+	AD_ATTR(adios_version, adios_version_show, NULL),
+
+	__ATTR_NULL
+};
+
+// Define the ADIOS scheduler type
+static struct elevator_type mq_adios = {
+	.ops = {
+		.next_request		= elv_rb_latter_request,
+		.former_request		= elv_rb_former_request,
+		.limit_depth		= adios_limit_depth,
+		.depth_updated		= adios_depth_updated,
+		.request_merged		= adios_request_merged,
+		.requests_merged	= adios_merged_requests,
+		.bio_merge			= adios_bio_merge,
+		.insert_requests	= adios_insert_requests,
+		.prepare_request	= adios_prepare_request,
+		.dispatch_request	= adios_dispatch_request,
+		.completed_request	= adios_completed_request,
+		.finish_request		= adios_finish_request,
+		.has_work			= adios_has_work,
+		.init_sched			= adios_init_sched,
+		.exit_sched			= adios_exit_sched,
+	},
+	.elevator_attrs = adios_sched_attrs,
+	.elevator_name = "adios",
+	.elevator_owner = THIS_MODULE,
+};
+MODULE_ALIAS("mq-adios-iosched");
+
+#define ADIOS_PROGNAME "Adaptive Deadline I/O Scheduler"
+#define ADIOS_AUTHOR   "Masahito Suzuki"
+
+// Initialize the ADIOS scheduler module
+static int __init adios_init(void) {
+	printk(KERN_INFO "%s %s by %s\n",
+		ADIOS_PROGNAME, ADIOS_VERSION, ADIOS_AUTHOR);
+	return elv_register(&mq_adios);
+}
+
+// Exit the ADIOS scheduler module
+static void __exit adios_exit(void) {
+	elv_unregister(&mq_adios);
+}
+
+module_init(adios_init);
+module_exit(adios_exit);
+
+MODULE_AUTHOR(ADIOS_AUTHOR);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(ADIOS_PROGNAME);
\ No newline at end of file
diff --git a/block/elevator.c b/block/elevator.c
index a2f8b2251dc6..06cabf5b356d 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -739,6 +739,19 @@ void elevator_set_default(struct request_queue *q)
 	if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
 		return;
 
+#ifdef CONFIG_MQ_IOSCHED_DEFAULT_ADIOS
+	ctx.name = "adios";
+#else // !CONFIG_MQ_IOSCHED_DEFAULT_ADIOS
+	bool is_sq = q->nr_hw_queues == 1 || blk_mq_is_shared_tags(q->tag_set->flags);
+#if defined(CONFIG_CACHY) && defined(CONFIG_IOSCHED_BFQ)
+	if (is_sq)
+		ctx.name = "bfq";
+#else
+	if (!is_sq)
+		return;
+#endif /* CONFIG_CACHY && CONFIG_IOSCHED_BFQ */
+#endif // CONFIG_MQ_IOSCHED_DEFAULT_ADIOS
+
 	/*
 	 * For single queue devices, default to using mq-deadline. If we
 	 * have multiple queues or mq-deadline is not available, default
@@ -748,13 +761,10 @@ void elevator_set_default(struct request_queue *q)
 	if (!ctx.type)
 		return;
 
-	if ((q->nr_hw_queues == 1 ||
-			blk_mq_is_shared_tags(q->tag_set->flags))) {
-		err = elevator_change(q, &ctx);
-		if (err < 0)
-			pr_warn("\"%s\" elevator initialization, failed %d, falling back to \"none\"\n",
-					ctx.name, err);
-	}
+	err = elevator_change(q, &ctx);
+	if (err < 0)
+		pr_warn("\"%s\" elevator initialization, failed %d, falling back to \"none\"\n",
+				ctx.name, err);
 	elevator_put(ctx.type);
 }
 
diff --git a/drivers/Makefile b/drivers/Makefile
index ccc05f1eae3e..83bb1e476a73 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -64,14 +64,8 @@ obj-y				+= char/
 # iommu/ comes before gpu as gpu are using iommu controllers
 obj-y				+= iommu/
 
-# gpu/ comes after char for AGP vs DRM startup and after iommu
-obj-y				+= gpu/
-
 obj-$(CONFIG_CONNECTOR)		+= connector/
 
-# i810fb depends on char/agp/
-obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
-
 obj-$(CONFIG_PARPORT)		+= parport/
 obj-y				+= base/ block/ misc/ mfd/ nfc/
 obj-$(CONFIG_LIBNVDIMM)		+= nvdimm/
@@ -83,6 +77,13 @@ obj-y				+= macintosh/
 obj-y				+= scsi/
 obj-y				+= nvme/
 obj-$(CONFIG_ATA)		+= ata/
+
+# gpu/ comes after char for AGP vs DRM startup and after iommu
+obj-y				+= gpu/
+
+# i810fb depends on char/agp/
+obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
+
 obj-$(CONFIG_TARGET_CORE)	+= target/
 obj-$(CONFIG_MTD)		+= mtd/
 obj-$(CONFIG_SPI)		+= spi/
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 931d0081169b..02ee4c4fa5b7 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1672,7 +1672,7 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance)
 }
 #endif
 
-static void ahci_remap_check(struct pci_dev *pdev, int bar,
+static int ahci_remap_check(struct pci_dev *pdev, int bar,
 		struct ahci_host_priv *hpriv)
 {
 	int i;
@@ -1685,7 +1685,7 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar,
 	    pci_resource_len(pdev, bar) < SZ_512K ||
 	    bar != AHCI_PCI_BAR_STANDARD ||
 	    !(readl(hpriv->mmio + AHCI_VSCAP) & 1))
-		return;
+		return 0;
 
 	cap = readq(hpriv->mmio + AHCI_REMAP_CAP);
 	for (i = 0; i < AHCI_MAX_REMAP; i++) {
@@ -1700,18 +1700,11 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar,
 	}
 
 	if (!hpriv->remapped_nvme)
-		return;
-
-	dev_warn(&pdev->dev, "Found %u remapped NVMe devices.\n",
-		 hpriv->remapped_nvme);
-	dev_warn(&pdev->dev,
-		 "Switch your BIOS from RAID to AHCI mode to use them.\n");
+		return 0;
 
-	/*
-	 * Don't rely on the msi-x capability in the remap case,
-	 * share the legacy interrupt across ahci and remapped devices.
-	 */
-	hpriv->flags |= AHCI_HFLAG_NO_MSI;
+	/* Abort probe, allowing intel-nvme-remap to step in when available */
+	dev_info(&pdev->dev, "Device will be handled by intel-nvme-remap.\n");
+	return -ENODEV;
 }
 
 static int ahci_get_irq_vector(struct ata_host *host, int port)
@@ -1975,7 +1968,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return -ENOMEM;
 
 	/* detect remapped nvme devices */
-	ahci_remap_check(pdev, ahci_pci_bar, hpriv);
+	rc = ahci_remap_check(pdev, ahci_pci_bar, hpriv);
+	if (rc)
+		return rc;
 
 	sysfs_add_file_to_group(&pdev->dev.kobj,
 				&dev_attr_remapped_nvme.attr,
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index 2c5c228408bf..918e2bebfe78 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -9,7 +9,6 @@ config X86_INTEL_PSTATE
 	select ACPI_PROCESSOR if ACPI
 	select ACPI_CPPC_LIB if X86_64 && ACPI && SCHED_MC_PRIO
 	select CPU_FREQ_GOV_PERFORMANCE
-	select CPU_FREQ_GOV_SCHEDUTIL if SMP
 	help
 	  This driver provides a P state for Intel core processors.
 	  The driver implements an internal governor and will become
@@ -39,7 +38,6 @@ config X86_AMD_PSTATE
 	depends on X86 && ACPI
 	select ACPI_PROCESSOR
 	select ACPI_CPPC_LIB if X86_64
-	select CPU_FREQ_GOV_SCHEDUTIL if SMP
 	help
 	  This driver adds a CPUFreq driver which utilizes a fine grain
 	  processor performance frequency control range instead of legacy
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index ec4abe374573..00c78f252e03 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -3914,6 +3914,8 @@ static int __init intel_pstate_setup(char *str)
 
 	if (!strcmp(str, "disable"))
 		no_load = 1;
+	else if (!strcmp(str, "enable"))
+		no_load = 0;
 	else if (!strcmp(str, "active"))
 		default_driver = &intel_pstate;
 	else if (!strcmp(str, "passive"))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b20a06abb65d..4dd9214873c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -164,6 +164,7 @@ struct amdgpu_watchdog_timer {
  */
 extern int amdgpu_modeset;
 extern unsigned int amdgpu_vram_limit;
+extern int amdgpu_ignore_min_pcap;
 extern int amdgpu_vis_vram_limit;
 extern int amdgpu_gart_size;
 extern int amdgpu_gtt_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6ccb80e2d7c9..69ad491ded1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ enum AMDGPU_DEBUG_MASK {
 };
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
+int amdgpu_ignore_min_pcap = 0; /* do not ignore by default */
 int amdgpu_vis_vram_limit;
 int amdgpu_gart_size = -1; /* auto */
 int amdgpu_gtt_size = -1; /* auto */
@@ -272,6 +273,15 @@ struct amdgpu_watchdog_timer amdgpu_watchdog_timer = {
 	.period = 0x0, /* default to 0x0 (timeout disable) */
 };
 
+/**
+ * DOC: ignore_min_pcap (int)
+ * Ignore the minimum power cap.
+ * Useful on graphics cards where the minimum power cap is very high.
+ * The default is 0 (Do not ignore).
+ */
+MODULE_PARM_DESC(ignore_min_pcap, "Ignore the minimum power cap");
+module_param_named(ignore_min_pcap, amdgpu_ignore_min_pcap, int, 0600);
+
 /**
  * DOC: vramlimit (int)
  * Restrict the total amount of VRAM in MiB for testing.  The default is 0 (Use full VRAM).
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index abd3b6564373..46937e6fa78d 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -56,4 +56,10 @@ config DRM_AMD_SECURE_DISPLAY
 	  This option enables the calculation of crc of specific region via
 	  debugfs. Cooperate with specific DMCU FW.
 
+config AMD_PRIVATE_COLOR
+	bool "Enable KMS color management by AMD for AMD"
+	default n
+	help
+	  This option extends the KMS color management API with AMD driver-specific properties to enhance the color management support on AMD Steam Deck.
+
 endmenu
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 1ea5a250440f..6ecddd7211eb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4931,7 +4931,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
 		return r;
 	}
 
-#ifdef AMD_PRIVATE_COLOR
+#ifdef CONFIG_AMD_PRIVATE_COLOR
 	if (amdgpu_dm_create_color_properties(adev)) {
 		dc_state_release(state->context);
 		kfree(state);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 1dcc79b35225..be01e61bd039 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -181,7 +181,7 @@ static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x)
 	return val;
 }
 
-#ifdef AMD_PRIVATE_COLOR
+#ifdef CONFIG_AMD_PRIVATE_COLOR
 /* Pre-defined Transfer Functions (TF)
  *
  * AMD driver supports pre-defined mathematical functions for transferring
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 9fcd72d87d25..6137439a7009 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -503,7 +503,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc)
 }
 #endif
 
-#ifdef AMD_PRIVATE_COLOR
+#ifdef CONFIG_AMD_PRIVATE_COLOR
 /**
  * dm_crtc_additional_color_mgmt - enable additional color properties
  * @crtc: DRM CRTC
@@ -585,7 +585,7 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
 #if defined(CONFIG_DEBUG_FS)
 	.late_register = amdgpu_dm_crtc_late_register,
 #endif
-#ifdef AMD_PRIVATE_COLOR
+#ifdef CONFIG_AMD_PRIVATE_COLOR
 	.atomic_set_property = amdgpu_dm_atomic_crtc_set_property,
 	.atomic_get_property = amdgpu_dm_atomic_crtc_get_property,
 #endif
@@ -780,7 +780,7 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
 
 	drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
 
-#ifdef AMD_PRIVATE_COLOR
+#ifdef CONFIG_AMD_PRIVATE_COLOR
 	dm_crtc_additional_color_mgmt(&acrtc->base);
 #endif
 	return 0;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 7c4496fb4b9d..df86a28eda58 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1602,7 +1602,7 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane,
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
 
-#ifdef AMD_PRIVATE_COLOR
+#ifdef CONFIG_AMD_PRIVATE_COLOR
 static void
 dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
 					     struct drm_plane *plane)
@@ -1831,7 +1831,7 @@ static const struct drm_plane_funcs dm_plane_funcs = {
 	.atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state,
 	.atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state,
 	.format_mod_supported = amdgpu_dm_plane_format_mod_supported,
-#ifdef AMD_PRIVATE_COLOR
+#ifdef CONFIG_AMD_PRIVATE_COLOR
 	.atomic_set_property = dm_atomic_plane_set_property,
 	.atomic_get_property = dm_atomic_plane_get_property,
 #endif
@@ -1927,7 +1927,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
 	else
 		drm_plane_helper_add(plane, &dm_plane_helper_funcs);
 
-#ifdef AMD_PRIVATE_COLOR
+#ifdef CONFIG_AMD_PRIVATE_COLOR
 	dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
 #else
 	res = dm_plane_init_colorops(plane);
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 65296a819e6a..3d37cbefe4b8 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -3342,6 +3342,9 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev,
 					 struct device_attribute *attr,
 					 char *buf)
 {
+	if (amdgpu_ignore_min_pcap)
+		return sysfs_emit(buf, "%i\n", 0);
+
 	return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MIN);
 }
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index f51fa265230b..82aec5c85d41 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2974,7 +2974,10 @@ int smu_get_power_limit(void *handle,
 			*limit = smu->max_power_limit;
 			break;
 		case SMU_PPT_LIMIT_MIN:
-			*limit = smu->min_power_limit;
+			if (amdgpu_ignore_min_pcap)
+				*limit = 0;
+			else
+				*limit = smu->min_power_limit;
 			break;
 		default:
 			return -EINVAL;
@@ -2995,7 +2998,15 @@ static int smu_set_power_limit(void *handle, uint32_t limit_type, uint32_t limit
 	if (limit_type == SMU_DEFAULT_PPT_LIMIT) {
 		if (!limit)
 			limit = smu->current_power_limit;
-		if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) {
+
+		if (amdgpu_ignore_min_pcap) {
+			if (limit > smu->max_power_limit) {
+				dev_err(smu->adev->dev,
+						"New power limit (%d) is over the max allowed %d\n",
+						limit, smu->max_power_limit);
+				return -EINVAL;
+			}
+		} else if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) {
 			dev_err(smu->adev->dev,
 				"New power limit (%d) is out of range [%d,%d]\n",
 				limit, smu->min_power_limit, smu->max_power_limit);
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 90ff6be85cf4..15159c1cf6e1 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -46,6 +46,7 @@ struct evdev_client {
 	struct fasync_struct *fasync;
 	struct evdev *evdev;
 	struct list_head node;
+	struct rcu_head rcu;
 	enum input_clock_type clk_type;
 	bool revoked;
 	unsigned long *evmasks[EV_CNT];
@@ -368,13 +369,22 @@ static void evdev_attach_client(struct evdev *evdev,
 	spin_unlock(&evdev->client_lock);
 }
 
+static void evdev_reclaim_client(struct rcu_head *rp)
+{
+	struct evdev_client *client = container_of(rp, struct evdev_client, rcu);
+	unsigned int i;
+	for (i = 0; i < EV_CNT; ++i)
+		bitmap_free(client->evmasks[i]);
+	kvfree(client);
+}
+
 static void evdev_detach_client(struct evdev *evdev,
 				struct evdev_client *client)
 {
 	spin_lock(&evdev->client_lock);
 	list_del_rcu(&client->node);
 	spin_unlock(&evdev->client_lock);
-	synchronize_rcu();
+	call_rcu(&client->rcu, evdev_reclaim_client);
 }
 
 static int evdev_open_device(struct evdev *evdev)
@@ -427,7 +437,6 @@ static int evdev_release(struct inode *inode, struct file *file)
 {
 	struct evdev_client *client = file->private_data;
 	struct evdev *evdev = client->evdev;
-	unsigned int i;
 
 	mutex_lock(&evdev->mutex);
 
@@ -439,11 +448,6 @@ static int evdev_release(struct inode *inode, struct file *file)
 
 	evdev_detach_client(evdev, client);
 
-	for (i = 0; i < EV_CNT; ++i)
-		bitmap_free(client->evmasks[i]);
-
-	kvfree(client);
-
 	evdev_close_device(evdev);
 
 	return 0;
@@ -486,7 +490,6 @@ static int evdev_open(struct inode *inode, struct file *file)
 
  err_free_client:
 	evdev_detach_client(evdev, client);
-	kvfree(client);
 	return error;
 }
 
diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig
index d50ccac9733c..bd785d6a24ba 100644
--- a/drivers/media/v4l2-core/Kconfig
+++ b/drivers/media/v4l2-core/Kconfig
@@ -40,6 +40,11 @@ config VIDEO_TUNER
 config V4L2_JPEG_HELPER
 	tristate
 
+config V4L2_LOOPBACK
+	tristate "V4L2 loopback device"
+	help
+	  V4L2 loopback device
+
 # Used by drivers that need v4l2-h264.ko
 config V4L2_H264
 	tristate
diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile
index 329f0eadce99..dd34e32edb02 100644
--- a/drivers/media/v4l2-core/Makefile
+++ b/drivers/media/v4l2-core/Makefile
@@ -34,5 +34,7 @@ obj-$(CONFIG_V4L2_JPEG_HELPER) += v4l2-jpeg.o
 obj-$(CONFIG_V4L2_MEM2MEM_DEV) += v4l2-mem2mem.o
 obj-$(CONFIG_V4L2_VP9) += v4l2-vp9.o
 
+obj-$(CONFIG_V4L2_LOOPBACK) += v4l2loopback.o
+
 obj-$(CONFIG_VIDEO_TUNER) += tuner.o
 obj-$(CONFIG_VIDEO_DEV) += v4l2-dv-timings.o videodev.o
diff --git a/drivers/media/v4l2-core/v4l2loopback.c b/drivers/media/v4l2-core/v4l2loopback.c
new file mode 100644
index 000000000000..22e270717191
--- /dev/null
+++ b/drivers/media/v4l2-core/v4l2loopback.c
@@ -0,0 +1,3331 @@
+/* -*- c-file-style: "linux" -*- */
+/*
+ * v4l2loopback.c  --  video4linux2 loopback driver
+ *
+ * Copyright (C) 2005-2009 Vasily Levin (vasaka@gmail.com)
+ * Copyright (C) 2010-2023 IOhannes m zmoelnig (zmoelnig@iem.at)
+ * Copyright (C) 2011 Stefan Diewald (stefan.diewald@mytum.de)
+ * Copyright (C) 2012 Anton Novikov (random.plant@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#include <linux/version.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/time.h>
+#include <linux/module.h>
+#include <linux/videodev2.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/capability.h>
+#include <linux/timer.h>
+#include <linux/eventpoll.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-common.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-event.h>
+
+#include <linux/miscdevice.h>
+#include "v4l2loopback.h"
+
+#define V4L2LOOPBACK_CTL_ADD_legacy 0x4C80
+#define V4L2LOOPBACK_CTL_REMOVE_legacy 0x4C81
+#define V4L2LOOPBACK_CTL_QUERY_legacy 0x4C82
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+#error This module is not supported on kernels before 4.0.0.
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+#define strscpy strlcpy
+#endif
+
+#if defined(timer_setup)
+#define HAVE_TIMER_SETUP
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0)
+#define VFL_TYPE_VIDEO VFL_TYPE_GRABBER
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 2, 0)
+#define timer_delete_sync del_timer_sync
+#endif
+
+#define fh_to_opener(ptr) container_of((ptr), struct v4l2_loopback_opener, fh)
+#define file_to_opener(ptr) \
+	container_of(file_to_v4l2_fh(ptr), struct v4l2_loopback_opener, fh)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 18, 0)
+#define v4l2l_f_to_opener(_file, _fh) fh_to_opener(_fh)
+#define v4l2_fh_add(fh, filp) v4l2_fh_add(fh)
+#define v4l2_fh_del(fh, filp) v4l2_fh_del(fh)
+#else
+#define v4l2l_f_to_opener(_file, _fh) file_to_opener(_file)
+#endif
+
+#define V4L2LOOPBACK_VERSION_CODE                                              \
+	KERNEL_VERSION(V4L2LOOPBACK_VERSION_MAJOR, V4L2LOOPBACK_VERSION_MINOR, \
+		       V4L2LOOPBACK_VERSION_BUGFIX)
+
+MODULE_DESCRIPTION("V4L2 loopback video device");
+MODULE_AUTHOR("Vasily Levin, "
+	      "IOhannes m zmoelnig <zmoelnig@iem.at>,"
+	      "Stefan Diewald,"
+	      "Anton Novikov"
+	      "et al.");
+#ifdef SNAPSHOT_VERSION
+MODULE_VERSION(__stringify(SNAPSHOT_VERSION));
+#else
+MODULE_VERSION("" __stringify(V4L2LOOPBACK_VERSION_MAJOR) "." __stringify(
+	V4L2LOOPBACK_VERSION_MINOR) "." __stringify(V4L2LOOPBACK_VERSION_BUGFIX));
+#endif
+MODULE_LICENSE("GPL");
+
+/*
+ * helpers
+ */
+#define dprintk(fmt, args...)                                          \
+	do {                                                           \
+		if (debug > 0) {                                       \
+			printk(KERN_INFO "v4l2-loopback[" __stringify( \
+				       __LINE__) "], pid(%d):  " fmt,  \
+			       task_pid_nr(current), ##args);          \
+		}                                                      \
+	} while (0)
+
+#define MARK()                                                             \
+	do {                                                               \
+		if (debug > 1) {                                           \
+			printk(KERN_INFO "%s:%d[%s], pid(%d)\n", __FILE__, \
+			       __LINE__, __func__, task_pid_nr(current));  \
+		}                                                          \
+	} while (0)
+
+#define dprintkrw(fmt, args...)                                        \
+	do {                                                           \
+		if (debug > 2) {                                       \
+			printk(KERN_INFO "v4l2-loopback[" __stringify( \
+				       __LINE__) "], pid(%d): " fmt,   \
+			       task_pid_nr(current), ##args);          \
+		}                                                      \
+	} while (0)
+
+static inline void v4l2l_get_timestamp(struct v4l2_buffer *b)
+{
+	struct timespec64 ts;
+	ktime_get_ts64(&ts);
+
+	b->timestamp.tv_sec = ts.tv_sec;
+	b->timestamp.tv_usec = (ts.tv_nsec / NSEC_PER_USEC);
+	b->flags |= V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+	b->flags &= ~V4L2_BUF_FLAG_TIMESTAMP_COPY;
+}
+
+#if BITS_PER_LONG == 32
+#include <asm/div64.h> /* do_div() for 64bit division */
+static inline int v4l2l_mod64(const s64 A, const u32 B)
+{
+	u64 a = (u64)A;
+	u32 b = B;
+
+	if (A > 0)
+		return do_div(a, b);
+	a = -A;
+	return -do_div(a, b);
+}
+#else
+static inline int v4l2l_mod64(const s64 A, const u32 B)
+{
+	return A % B;
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0)
+typedef unsigned __poll_t;
+#endif
+
+/* module constants
+ *  can be overridden during he build process using something like
+ *	make KCPPFLAGS="-DMAX_DEVICES=100"
+ */
+
+/* maximum number of v4l2loopback devices that can be created */
+#ifndef MAX_DEVICES
+#define MAX_DEVICES 8
+#endif
+
+/* whether the default is to announce capabilities exclusively or not */
+#ifndef V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS
+#define V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS 0
+#endif
+
+/* when a producer is considered to have gone stale */
+#ifndef MAX_TIMEOUT
+#define MAX_TIMEOUT (100 * 1000) /* in msecs */
+#endif
+
+/* max buffers that can be mapped, actually they
+ * are all mapped to max_buffers buffers */
+#ifndef MAX_BUFFERS
+#define MAX_BUFFERS 32
+#endif
+
+/* module parameters */
+static int debug = 0;
+module_param(debug, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug, "debugging level (higher values == more verbose)");
+
+#define V4L2LOOPBACK_DEFAULT_MAX_BUFFERS 2
+static int max_buffers = V4L2LOOPBACK_DEFAULT_MAX_BUFFERS;
+module_param(max_buffers, int, S_IRUGO);
+MODULE_PARM_DESC(max_buffers,
+		 "how many buffers should be allocated [DEFAULT: " __stringify(
+			 V4L2LOOPBACK_DEFAULT_MAX_BUFFERS) "]");
+
+/* how many times a device can be opened
+ * the per-module default value can be overridden on a per-device basis using
+ * the /sys/devices interface
+ *
+ * note that max_openers should be at least 2 in order to get a working system:
+ *   one opener for the producer and one opener for the consumer
+ *   however, we leave that to the user
+ */
+#define V4L2LOOPBACK_DEFAULT_MAX_OPENERS 10
+static int max_openers = V4L2LOOPBACK_DEFAULT_MAX_OPENERS;
+module_param(max_openers, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(
+	max_openers,
+	"how many users can open the loopback device [DEFAULT: " __stringify(
+		V4L2LOOPBACK_DEFAULT_MAX_OPENERS) "]");
+
+static int devices = -1;
+module_param(devices, int, 0);
+MODULE_PARM_DESC(devices, "how many devices should be created");
+
+static int video_nr[MAX_DEVICES] = { [0 ...(MAX_DEVICES - 1)] = -1 };
+module_param_array(video_nr, int, NULL, 0444);
+MODULE_PARM_DESC(video_nr,
+		 "video device numbers (-1=auto, 0=/dev/video0, etc.)");
+
+static char *card_label[MAX_DEVICES];
+module_param_array(card_label, charp, NULL, 0000);
+MODULE_PARM_DESC(card_label, "card labels for each device");
+
+static bool exclusive_caps[MAX_DEVICES] = {
+	[0 ...(MAX_DEVICES - 1)] = V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS
+};
+module_param_array(exclusive_caps, bool, NULL, 0444);
+/* FIXXME: wording */
+MODULE_PARM_DESC(
+	exclusive_caps,
+	"whether to announce OUTPUT/CAPTURE capabilities exclusively or not  [DEFAULT: " __stringify(
+		V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS) "]");
+
+/* format specifications */
+#define V4L2LOOPBACK_SIZE_MIN_WIDTH 2
+#define V4L2LOOPBACK_SIZE_MIN_HEIGHT 1
+#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH 8192
+#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT 8192
+
+#define V4L2LOOPBACK_SIZE_DEFAULT_WIDTH 640
+#define V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT 480
+
+static int max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH;
+module_param(max_width, int, S_IRUGO);
+MODULE_PARM_DESC(max_width,
+		 "maximum allowed frame width [DEFAULT: " __stringify(
+			 V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH) "]");
+static int max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT;
+module_param(max_height, int, S_IRUGO);
+MODULE_PARM_DESC(max_height,
+		 "maximum allowed frame height [DEFAULT: " __stringify(
+			 V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT) "]");
+
+static DEFINE_IDR(v4l2loopback_index_idr);
+static DEFINE_MUTEX(v4l2loopback_ctl_mutex);
+
+/* frame intervals */
+#define V4L2LOOPBACK_FRAME_INTERVAL_MAX __UINT32_MAX__
+#define V4L2LOOPBACK_FPS_DEFAULT 30
+#define V4L2LOOPBACK_FPS_MAX 1000
+
+/* control IDs */
+#define V4L2LOOPBACK_CID_BASE (V4L2_CID_USER_BASE | 0xf000)
+#define CID_KEEP_FORMAT (V4L2LOOPBACK_CID_BASE + 0)
+#define CID_SUSTAIN_FRAMERATE (V4L2LOOPBACK_CID_BASE + 1)
+#define CID_TIMEOUT (V4L2LOOPBACK_CID_BASE + 2)
+#define CID_TIMEOUT_IMAGE_IO (V4L2LOOPBACK_CID_BASE + 3)
+
+static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl);
+static const struct v4l2_ctrl_ops v4l2loopback_ctrl_ops = {
+	.s_ctrl = v4l2loopback_s_ctrl,
+};
+static const struct v4l2_ctrl_config v4l2loopback_ctrl_keepformat = {
+	// clang-format off
+	.ops	= &v4l2loopback_ctrl_ops,
+	.id	= CID_KEEP_FORMAT,
+	.name	= "keep_format",
+	.type	= V4L2_CTRL_TYPE_BOOLEAN,
+	.min	= 0,
+	.max	= 1,
+	.step	= 1,
+	.def	= 0,
+	// clang-format on
+};
+static const struct v4l2_ctrl_config v4l2loopback_ctrl_sustainframerate = {
+	// clang-format off
+	.ops	= &v4l2loopback_ctrl_ops,
+	.id	= CID_SUSTAIN_FRAMERATE,
+	.name	= "sustain_framerate",
+	.type	= V4L2_CTRL_TYPE_BOOLEAN,
+	.min	= 0,
+	.max	= 1,
+	.step	= 1,
+	.def	= 0,
+	// clang-format on
+};
+static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeout = {
+	// clang-format off
+	.ops	= &v4l2loopback_ctrl_ops,
+	.id	= CID_TIMEOUT,
+	.name	= "timeout",
+	.type	= V4L2_CTRL_TYPE_INTEGER,
+	.min	= 0,
+	.max	= MAX_TIMEOUT,
+	.step	= 1,
+	.def	= 0,
+	// clang-format on
+};
+static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeoutimageio = {
+	// clang-format off
+	.ops	= &v4l2loopback_ctrl_ops,
+	.id	= CID_TIMEOUT_IMAGE_IO,
+	.name	= "timeout_image_io",
+	.type	= V4L2_CTRL_TYPE_BUTTON,
+	.min	= 0,
+	.max	= 0,
+	.step	= 0,
+	.def	= 0,
+	// clang-format on
+};
+
+/* module structures */
+struct v4l2loopback_private {
+	int device_nr;
+};
+
+/* TODO(vasaka) use typenames which are common to kernel, but first find out if
+ * it is needed */
+/* struct keeping state and settings of loopback device */
+
+struct v4l2l_buffer {
+	struct v4l2_buffer buffer;
+	struct list_head list_head;
+	atomic_t use_count;
+};
+
+struct v4l2_loopback_device {
+	struct v4l2_device v4l2_dev;
+	struct v4l2_ctrl_handler ctrl_handler;
+	struct video_device *vdev;
+
+	/* loopback device-specific parameters */
+	char card_label[32];
+	bool announce_all_caps; /* announce both OUTPUT and CAPTURE capabilities
+				 * when true; else announce OUTPUT when no
+				 * writer is streaming, otherwise CAPTURE. */
+	int max_openers; /* how many times can this device be opened */
+	int min_width, max_width;
+	int min_height, max_height;
+
+	/* pixel and stream format */
+	struct v4l2_pix_format pix_format;
+	bool pix_format_has_valid_sizeimage;
+	struct v4l2_captureparm capture_param;
+	unsigned long frame_jiffies;
+
+	/* ctrls */
+	int keep_format; /* CID_KEEP_FORMAT; lock the format, do not free
+			  * on close(), and when `!announce_all_caps` do NOT
+			  * fall back to OUTPUT when no writers attached (clear
+			  * `keep_format` to attach a new writer) */
+	int sustain_framerate; /* CID_SUSTAIN_FRAMERATE; duplicate frames to maintain
+				  (close to) nominal framerate */
+	unsigned long timeout_jiffies; /* CID_TIMEOUT; 0 means disabled */
+	int timeout_image_io; /* CID_TIMEOUT_IMAGE_IO; next opener will
+			       * queue/dequeue the timeout image buffer */
+
+	/* buffers for OUTPUT and CAPTURE */
+	u8 *image; /* pointer to actual buffers data */
+	unsigned long image_size; /* number of bytes alloc'd for all buffers */
+	struct v4l2l_buffer buffers[MAX_BUFFERS]; /* inner driver buffers */
+	u32 buffer_count; /* should not be big, 4 is a good choice */
+	u32 buffer_size; /* number of bytes alloc'd per buffer */
+	u32 used_buffer_count; /* number of buffers allocated to openers */
+	struct list_head outbufs_list; /* FIFO queue for OUTPUT buffers */
+	u32 bufpos2index[MAX_BUFFERS]; /* mapping of `(position % used_buffers)`
+					* to `buffers[index]` */
+	s64 write_position; /* sequence number of last 'displayed' buffer plus
+			     * one */
+
+	/* synchronization between openers */
+	atomic_t open_count;
+	struct mutex image_mutex; /* mutex for allocating image(s) and
+				   * exchanging format tokens */
+	spinlock_t lock; /* lock for the timeout and framerate timers */
+	spinlock_t list_lock; /* lock for the OUTPUT buffer queue */
+	wait_queue_head_t read_event;
+	u32 format_tokens; /* tokens to 'set format' for OUTPUT, CAPTURE, or
+			    * timeout buffers */
+	u32 stream_tokens; /* tokens to 'start' OUTPUT, CAPTURE, or timeout
+			    * stream */
+
+	/* sustain framerate */
+	struct timer_list sustain_timer;
+	unsigned int reread_count;
+
+	/* timeout */
+	u8 *timeout_image; /* copied to outgoing buffers when timeout passes */
+	struct v4l2l_buffer timeout_buffer;
+	u32 timeout_buffer_size; /* number bytes alloc'd for timeout buffer */
+	struct timer_list timeout_timer;
+	int timeout_happened;
+};
+
+enum v4l2l_io_method {
+	V4L2L_IO_NONE = 0,
+	V4L2L_IO_MMAP = 1,
+	V4L2L_IO_FILE = 2,
+	V4L2L_IO_TIMEOUT = 3,
+};
+
+/* struct keeping state and type of opener */
+struct v4l2_loopback_opener {
+	u32 format_token; /* token (if any) for type used in call to S_FMT or
+			   * REQBUFS */
+	u32 stream_token; /* token (if any) for type used in call to STREAMON */
+	u32 buffer_count; /* number of buffers (if any) that opener acquired via
+			   * REQBUFS */
+	s64 read_position; /* sequence number of the next 'captured' frame */
+	unsigned int reread_count;
+	enum v4l2l_io_method io_method;
+
+	struct v4l2_fh fh;
+};
+
+/* this is heavily inspired by the bttv driver found in the linux kernel */
+struct v4l2l_format {
+	char *name;
+	int fourcc; /* video4linux 2 */
+	int depth; /* bit/pixel */
+	int flags;
+};
+/* set the v4l2l_format.flags to PLANAR for non-packed formats */
+#define FORMAT_FLAGS_PLANAR 0x01
+#define FORMAT_FLAGS_COMPRESSED 0x02
+
+#include "v4l2loopback_formats.h"
+
+#ifndef V4L2_TYPE_IS_CAPTURE
+#define V4L2_TYPE_IS_CAPTURE(type)                \
+	((type) == V4L2_BUF_TYPE_VIDEO_CAPTURE || \
+	 (type) == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
+#endif /* V4L2_TYPE_IS_CAPTURE */
+#ifndef V4L2_TYPE_IS_OUTPUT
+#define V4L2_TYPE_IS_OUTPUT(type)                \
+	((type) == V4L2_BUF_TYPE_VIDEO_OUTPUT || \
+	 (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
+#endif /* V4L2_TYPE_IS_OUTPUT */
+
+/* token values for privilege to set format or start/stop stream */
+#define V4L2L_TOKEN_CAPTURE 0x01
+#define V4L2L_TOKEN_OUTPUT 0x02
+#define V4L2L_TOKEN_TIMEOUT 0x04
+#define V4L2L_TOKEN_MASK \
+	(V4L2L_TOKEN_CAPTURE | V4L2L_TOKEN_OUTPUT | V4L2L_TOKEN_TIMEOUT)
+
+/* helpers for token exchange and token status */
+#define token_from_type(type) \
+	(V4L2_TYPE_IS_CAPTURE(type) ? V4L2L_TOKEN_CAPTURE : V4L2L_TOKEN_OUTPUT)
+#define acquire_token(dev, opener, label, token) \
+	do {                                     \
+		(opener)->label##_token = token; \
+		(dev)->label##_tokens &= ~token; \
+	} while (0)
+#define release_token(dev, opener, label)                         \
+	do {                                                      \
+		(dev)->label##_tokens |= (opener)->label##_token; \
+		(opener)->label##_token = 0;                      \
+	} while (0)
+#define has_output_token(token) (token & V4L2L_TOKEN_OUTPUT)
+#define has_capture_token(token) (token & V4L2L_TOKEN_CAPTURE)
+#define has_no_owners(dev) ((~((dev)->format_tokens) & V4L2L_TOKEN_MASK) == 0)
+#define has_other_owners(opener, dev) \
+	(~((dev)->format_tokens ^ (opener)->format_token) & V4L2L_TOKEN_MASK)
+#define need_timeout_buffer(dev, token) \
+	((dev)->timeout_jiffies > 0 || (token) & V4L2L_TOKEN_TIMEOUT)
+
+static const unsigned int FORMATS = ARRAY_SIZE(formats);
+
+static char *fourcc2str(unsigned int fourcc, char buf[5])
+{
+	buf[0] = (fourcc >> 0) & 0xFF;
+	buf[1] = (fourcc >> 8) & 0xFF;
+	buf[2] = (fourcc >> 16) & 0xFF;
+	buf[3] = (fourcc >> 24) & 0xFF;
+	buf[4] = 0;
+
+	return buf;
+}
+
+static const struct v4l2l_format *format_by_fourcc(int fourcc)
+{
+	unsigned int i;
+	char buf[5];
+
+	for (i = 0; i < FORMATS; i++) {
+		if (formats[i].fourcc == fourcc)
+			return formats + i;
+	}
+
+	dprintk("unsupported format '%4s'\n", fourcc2str(fourcc, buf));
+	return NULL;
+}
+
+static void pix_format_set_size(struct v4l2_pix_format *f,
+				const struct v4l2l_format *fmt,
+				unsigned int width, unsigned int height)
+{
+	f->width = width;
+	f->height = height;
+
+	if (fmt->flags & FORMAT_FLAGS_PLANAR) {
+		f->bytesperline = width; /* Y plane */
+		f->sizeimage = (width * height * fmt->depth) >> 3;
+	} else if (fmt->flags & FORMAT_FLAGS_COMPRESSED) {
+		/* doesn't make sense for compressed formats */
+		f->bytesperline = 0;
+		f->sizeimage = (width * height * fmt->depth) >> 3;
+	} else {
+		f->bytesperline = (width * fmt->depth) >> 3;
+		f->sizeimage = height * f->bytesperline;
+	}
+}
+
+static int v4l2l_fill_format(struct v4l2_format *fmt, const u32 minwidth,
+			     const u32 maxwidth, const u32 minheight,
+			     const u32 maxheight)
+{
+	u32 width = fmt->fmt.pix.width, height = fmt->fmt.pix.height;
+	u32 pixelformat = fmt->fmt.pix.pixelformat;
+	struct v4l2_format fmt0 = *fmt;
+	u32 bytesperline = 0, sizeimage = 0;
+
+	if (!width)
+		width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH;
+	if (!height)
+		height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT;
+	width = clamp_val(width, minwidth, maxwidth);
+	height = clamp_val(height, minheight, maxheight);
+
+	/* sets: width,height,pixelformat,bytesperline,sizeimage */
+	if (!(V4L2_TYPE_IS_MULTIPLANAR(fmt0.type))) {
+		fmt0.fmt.pix.bytesperline = 0;
+		fmt0.fmt.pix.sizeimage = 0;
+	}
+
+	if (0) {
+		;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+	} else if (!v4l2_fill_pixfmt(&fmt0.fmt.pix, pixelformat, width,
+				     height)) {
+		;
+	} else if (!v4l2_fill_pixfmt_mp(&fmt0.fmt.pix_mp, pixelformat, width,
+					height)) {
+		;
+#endif
+	} else {
+		const struct v4l2l_format *format =
+			format_by_fourcc(pixelformat);
+		if (!format)
+			return -EINVAL;
+		pix_format_set_size(&fmt0.fmt.pix, format, width, height);
+		fmt0.fmt.pix.pixelformat = format->fourcc;
+	}
+
+	if (V4L2_TYPE_IS_MULTIPLANAR(fmt0.type)) {
+		*fmt = fmt0;
+
+		if ((fmt->fmt.pix_mp.colorspace == V4L2_COLORSPACE_DEFAULT) ||
+		    (fmt->fmt.pix_mp.colorspace > V4L2_COLORSPACE_DCI_P3))
+			fmt->fmt.pix_mp.colorspace = V4L2_COLORSPACE_SRGB;
+		if (V4L2_FIELD_ANY == fmt->fmt.pix_mp.field)
+			fmt->fmt.pix_mp.field = V4L2_FIELD_NONE;
+	} else {
+		bytesperline = fmt->fmt.pix.bytesperline;
+		sizeimage = fmt->fmt.pix.sizeimage;
+
+		*fmt = fmt0;
+
+		if (!fmt->fmt.pix.bytesperline)
+			fmt->fmt.pix.bytesperline = bytesperline;
+		if (!fmt->fmt.pix.sizeimage)
+			fmt->fmt.pix.sizeimage = sizeimage;
+
+		if ((fmt->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT) ||
+		    (fmt->fmt.pix.colorspace > V4L2_COLORSPACE_DCI_P3))
+			fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB;
+		if (V4L2_FIELD_ANY == fmt->fmt.pix.field)
+			fmt->fmt.pix.field = V4L2_FIELD_NONE;
+	}
+
+	return 0;
+}
+
+/* Checks if v4l2l_fill_format() has set a valid, fixed sizeimage val. */
+static bool v4l2l_pix_format_has_valid_sizeimage(struct v4l2_format *fmt)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+	const struct v4l2_format_info *info;
+
+	info = v4l2_format_info(fmt->fmt.pix.pixelformat);
+	if (info && info->mem_planes == 1)
+		return true;
+#endif
+
+	return false;
+}
+
+static int pix_format_eq(const struct v4l2_pix_format *ref,
+			 const struct v4l2_pix_format *tgt, int strict)
+{
+	/* check if the two formats are equivalent.
+	 * ANY fields are handled gracefully
+	 */
+#define _pix_format_eq0(x)    \
+	if (ref->x != tgt->x) \
+	result = 0
+#define _pix_format_eq1(x, def)                              \
+	do {                                                 \
+		if ((def != tgt->x) && (ref->x != tgt->x)) { \
+			printk(KERN_INFO #x " failed");      \
+			result = 0;                          \
+		}                                            \
+	} while (0)
+	int result = 1;
+	_pix_format_eq0(width);
+	_pix_format_eq0(height);
+	_pix_format_eq0(pixelformat);
+	if (!strict)
+		return result;
+	_pix_format_eq1(field, V4L2_FIELD_ANY);
+	_pix_format_eq0(bytesperline);
+	_pix_format_eq0(sizeimage);
+	_pix_format_eq1(colorspace, V4L2_COLORSPACE_DEFAULT);
+	return result;
+}
+
+static void set_timeperframe(struct v4l2_loopback_device *dev,
+			     struct v4l2_fract *tpf)
+{
+	if (!tpf->denominator && !tpf->numerator) {
+		tpf->numerator = 1;
+		tpf->denominator = V4L2LOOPBACK_FPS_DEFAULT;
+	} else if (tpf->numerator >
+		   V4L2LOOPBACK_FRAME_INTERVAL_MAX * tpf->denominator) {
+		/* divide-by-zero or greater than maximum interval => min FPS */
+		tpf->numerator = V4L2LOOPBACK_FRAME_INTERVAL_MAX;
+		tpf->denominator = 1;
+	} else if (tpf->numerator * V4L2LOOPBACK_FPS_MAX < tpf->denominator) {
+		/* zero or lower than minimum interval => max FPS */
+		tpf->numerator = 1;
+		tpf->denominator = V4L2LOOPBACK_FPS_MAX;
+	}
+
+	dev->capture_param.timeperframe = *tpf;
+	dev->frame_jiffies =
+		max(1UL, (msecs_to_jiffies(1000) * tpf->numerator) /
+				 tpf->denominator);
+}
+
+static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd);
+
+/* device attributes */
+/* available via sysfs: /sys/devices/virtual/video4linux/video* */
+
+static ssize_t attr_show_format(struct device *cd,
+				struct device_attribute *attr, char *buf)
+{
+	/* gets the current format as "FOURCC:WxH@f/s", e.g. "YUYV:320x240@1000/30" */
+	struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd);
+	const struct v4l2_fract *tpf;
+	char buf4cc[5], buf_fps[32];
+
+	if (!dev || (has_no_owners(dev) && !dev->keep_format))
+		return 0;
+	tpf = &dev->capture_param.timeperframe;
+
+	fourcc2str(dev->pix_format.pixelformat, buf4cc);
+	if (tpf->numerator == 1)
+		snprintf(buf_fps, sizeof(buf_fps), "%u", tpf->denominator);
+	else
+		snprintf(buf_fps, sizeof(buf_fps), "%u/%u", tpf->denominator,
+			 tpf->numerator);
+	return sprintf(buf, "%4s:%ux%u@%s\n", buf4cc, dev->pix_format.width,
+		       dev->pix_format.height, buf_fps);
+}
+
+static ssize_t attr_store_format(struct device *cd,
+				 struct device_attribute *attr, const char *buf,
+				 size_t len)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd);
+	int fps_num = 0, fps_den = 1;
+
+	if (!dev)
+		return -ENODEV;
+
+	/* only fps changing is supported */
+	if (sscanf(buf, "@%u/%u", &fps_num, &fps_den) > 0) {
+		struct v4l2_fract f = { .numerator = fps_den,
+					.denominator = fps_num };
+		set_timeperframe(dev, &f);
+		return len;
+	}
+	return -EINVAL;
+}
+
+static DEVICE_ATTR(format, S_IRUGO | S_IWUSR, attr_show_format,
+		   attr_store_format);
+
+static ssize_t attr_show_buffers(struct device *cd,
+				 struct device_attribute *attr, char *buf)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd);
+
+	if (!dev)
+		return -ENODEV;
+
+	return sprintf(buf, "%u\n", dev->used_buffer_count);
+}
+
+static DEVICE_ATTR(buffers, S_IRUGO, attr_show_buffers, NULL);
+
+static ssize_t attr_show_maxopeners(struct device *cd,
+				    struct device_attribute *attr, char *buf)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd);
+
+	if (!dev)
+		return -ENODEV;
+
+	return sprintf(buf, "%d\n", dev->max_openers);
+}
+
+static ssize_t attr_store_maxopeners(struct device *cd,
+				     struct device_attribute *attr,
+				     const char *buf, size_t len)
+{
+	struct v4l2_loopback_device *dev = NULL;
+	unsigned long curr = 0;
+
+	if (kstrtoul(buf, 0, &curr))
+		return -EINVAL;
+
+	dev = v4l2loopback_cd2dev(cd);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->max_openers == curr)
+		return len;
+
+	if (curr > __INT_MAX__ || dev->open_count.counter > curr) {
+		/* request to limit to less openers as are currently attached to us */
+		return -EINVAL;
+	}
+
+	dev->max_openers = (int)curr;
+
+	return len;
+}
+
+static DEVICE_ATTR(max_openers, S_IRUGO | S_IWUSR, attr_show_maxopeners,
+		   attr_store_maxopeners);
+
+static ssize_t attr_show_state(struct device *cd, struct device_attribute *attr,
+			       char *buf)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd);
+
+	if (!dev)
+		return -ENODEV;
+
+	if (!has_output_token(dev->stream_tokens) || dev->keep_format) {
+		return sprintf(buf, "capture\n");
+	} else
+		return sprintf(buf, "output\n");
+
+	return -EAGAIN;
+}
+
+static DEVICE_ATTR(state, S_IRUGO, attr_show_state, NULL);
+
+static void v4l2loopback_remove_sysfs(struct video_device *vdev)
+{
+#define V4L2_SYSFS_DESTROY(x) device_remove_file(&vdev->dev, &dev_attr_##x)
+
+	if (vdev) {
+		V4L2_SYSFS_DESTROY(format);
+		V4L2_SYSFS_DESTROY(buffers);
+		V4L2_SYSFS_DESTROY(max_openers);
+		V4L2_SYSFS_DESTROY(state);
+		/* ... */
+	}
+}
+
+static void v4l2loopback_create_sysfs(struct video_device *vdev)
+{
+	int res = 0;
+
+#define V4L2_SYSFS_CREATE(x)                                 \
+	res = device_create_file(&vdev->dev, &dev_attr_##x); \
+	if (res < 0)                                         \
+	break
+	if (!vdev)
+		return;
+	do {
+		V4L2_SYSFS_CREATE(format);
+		V4L2_SYSFS_CREATE(buffers);
+		V4L2_SYSFS_CREATE(max_openers);
+		V4L2_SYSFS_CREATE(state);
+		/* ... */
+	} while (0);
+
+	if (res >= 0)
+		return;
+	dev_err(&vdev->dev, "%s error: %d\n", __func__, res);
+}
+
+/* Event APIs */
+
+#define V4L2LOOPBACK_EVENT_BASE (V4L2_EVENT_PRIVATE_START)
+#define V4L2LOOPBACK_EVENT_OFFSET 0x08E00000
+#define V4L2_EVENT_PRI_CLIENT_USAGE \
+	(V4L2LOOPBACK_EVENT_BASE + V4L2LOOPBACK_EVENT_OFFSET + 1)
+
+struct v4l2_event_client_usage {
+	__u32 count;
+};
+
+/* global module data */
+/* find a device based on it's device-number (e.g. '3' for /dev/video3) */
+struct v4l2loopback_lookup_cb_data {
+	int device_nr;
+	struct v4l2_loopback_device *device;
+};
+static int v4l2loopback_lookup_cb(int id, void *ptr, void *data)
+{
+	struct v4l2_loopback_device *device = ptr;
+	struct v4l2loopback_lookup_cb_data *cbdata = data;
+	if (cbdata && device && device->vdev) {
+		if (device->vdev->num == cbdata->device_nr) {
+			cbdata->device = device;
+			cbdata->device_nr = id;
+			return 1;
+		}
+	}
+	return 0;
+}
+static int v4l2loopback_lookup(int device_nr,
+			       struct v4l2_loopback_device **device)
+{
+	struct v4l2loopback_lookup_cb_data data = {
+		.device_nr = device_nr,
+		.device = NULL,
+	};
+	int err = idr_for_each(&v4l2loopback_index_idr, &v4l2loopback_lookup_cb,
+			       &data);
+	if (1 == err) {
+		if (device)
+			*device = data.device;
+		return data.device_nr;
+	}
+	return -ENODEV;
+}
+#define v4l2loopback_get_vdev_nr(vdev) \
+	((struct v4l2loopback_private *)video_get_drvdata(vdev))->device_nr
+static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd)
+{
+	struct video_device *loopdev = to_video_device(cd);
+	int device_nr = v4l2loopback_get_vdev_nr(loopdev);
+
+	return idr_find(&v4l2loopback_index_idr, device_nr);
+}
+
+static struct v4l2_loopback_device *v4l2loopback_getdevice(struct file *f)
+{
+	struct v4l2loopback_private *ptr = video_drvdata(f);
+	int nr = ptr->device_nr;
+
+	return idr_find(&v4l2loopback_index_idr, nr);
+}
+
+/* forward declarations */
+static void client_usage_queue_event(struct video_device *vdev);
+static bool any_buffers_mapped(struct v4l2_loopback_device *dev);
+static int allocate_buffers(struct v4l2_loopback_device *dev,
+			    struct v4l2_pix_format *pix_format);
+static void init_buffers(struct v4l2_loopback_device *dev, u32 bytes_used,
+			 u32 buffer_size);
+static void free_buffers(struct v4l2_loopback_device *dev);
+static int allocate_timeout_buffer(struct v4l2_loopback_device *dev);
+static void free_timeout_buffer(struct v4l2_loopback_device *dev);
+static void check_timers(struct v4l2_loopback_device *dev);
+static const struct v4l2_file_operations v4l2_loopback_fops;
+static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops;
+
+/* V4L2 ioctl caps and params calls */
+/* returns device capabilities
+ * called on VIDIOC_QUERYCAP
+ */
+static int vidioc_querycap(struct file *file, void *fh,
+			   struct v4l2_capability *cap)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	int device_nr = v4l2loopback_get_vdev_nr(dev->vdev);
+	__u32 capabilities = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE;
+
+	strscpy(cap->driver, "v4l2 loopback", sizeof(cap->driver));
+	snprintf(cap->card, sizeof(cap->card), "%s", dev->card_label);
+	snprintf(cap->bus_info, sizeof(cap->bus_info),
+		 "platform:v4l2loopback-%03d", device_nr);
+
+	if (dev->announce_all_caps) {
+		capabilities |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT;
+	} else {
+		if (opener->io_method == V4L2L_IO_TIMEOUT ||
+		    (has_output_token(dev->stream_tokens) &&
+		     !dev->keep_format)) {
+			capabilities |= V4L2_CAP_VIDEO_OUTPUT;
+		} else
+			capabilities |= V4L2_CAP_VIDEO_CAPTURE;
+	}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	dev->vdev->device_caps =
+#endif /* >=linux-4.7.0 */
+		cap->device_caps = cap->capabilities = capabilities;
+
+	cap->capabilities |= V4L2_CAP_DEVICE_CAPS;
+
+	memset(cap->reserved, 0, sizeof(cap->reserved));
+	return 0;
+}
+
+static int vidioc_enum_framesizes(struct file *file, void *fh,
+				  struct v4l2_frmsizeenum *argp)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+
+	/* there can be only one... */
+	if (argp->index)
+		return -EINVAL;
+
+	if (dev->keep_format || has_other_owners(opener, dev)) {
+		/* only current frame size supported */
+		if (argp->pixel_format != dev->pix_format.pixelformat)
+			return -EINVAL;
+
+		argp->type = V4L2_FRMSIZE_TYPE_DISCRETE;
+
+		argp->discrete.width = dev->pix_format.width;
+		argp->discrete.height = dev->pix_format.height;
+	} else {
+		/* return continuous sizes if pixel format is supported */
+		if (NULL == format_by_fourcc(argp->pixel_format))
+			return -EINVAL;
+
+		if (dev->min_width == dev->max_width &&
+		    dev->min_height == dev->max_height) {
+			argp->type = V4L2_FRMSIZE_TYPE_DISCRETE;
+
+			argp->discrete.width = dev->min_width;
+			argp->discrete.height = dev->min_height;
+		} else {
+			argp->type = V4L2_FRMSIZE_TYPE_CONTINUOUS;
+
+			argp->stepwise.min_width = dev->min_width;
+			argp->stepwise.min_height = dev->min_height;
+
+			argp->stepwise.max_width = dev->max_width;
+			argp->stepwise.max_height = dev->max_height;
+
+			argp->stepwise.step_width = 1;
+			argp->stepwise.step_height = 1;
+		}
+	}
+	return 0;
+}
+
+/* Test if the device is currently 'capable' of the buffer (stream) type when
+ * the `exclusive_caps` parameter is set. `keep_format` should lock the format
+ * and prevent free of buffers */
+static int check_buffer_capability(struct v4l2_loopback_device *dev,
+				   struct v4l2_loopback_opener *opener,
+				   enum v4l2_buf_type type)
+{
+	/* short-circuit for (non-compliant) timeout image mode */
+	if (opener->io_method == V4L2L_IO_TIMEOUT)
+		return 0;
+	if (dev->announce_all_caps)
+		return (type == V4L2_BUF_TYPE_VIDEO_CAPTURE ||
+			type == V4L2_BUF_TYPE_VIDEO_OUTPUT) ?
+			       0 :
+			       -EINVAL;
+	/* CAPTURE if opener has a capture format or a writer is streaming;
+	 * else OUTPUT. */
+	switch (type) {
+	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
+		if (!(has_capture_token(opener->format_token) ||
+		      !has_output_token(dev->stream_tokens)))
+			return -EINVAL;
+		break;
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
+		if (!(has_output_token(opener->format_token) ||
+		      has_output_token(dev->stream_tokens)))
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+/* returns frameinterval (fps) for the set resolution
+ * called on VIDIOC_ENUM_FRAMEINTERVALS
+ */
+static int vidioc_enum_frameintervals(struct file *file, void *fh,
+				      struct v4l2_frmivalenum *argp)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+
+	/* there can be only one... */
+	if (argp->index)
+		return -EINVAL;
+
+	if (dev->keep_format || has_other_owners(opener, dev)) {
+		/* keep_format also locks the frame rate */
+		if (argp->width != dev->pix_format.width ||
+		    argp->height != dev->pix_format.height ||
+		    argp->pixel_format != dev->pix_format.pixelformat)
+			return -EINVAL;
+
+		argp->type = V4L2_FRMIVAL_TYPE_DISCRETE;
+		argp->discrete = dev->capture_param.timeperframe;
+	} else {
+		if (argp->width < dev->min_width ||
+		    argp->width > dev->max_width ||
+		    argp->height < dev->min_height ||
+		    argp->height > dev->max_height ||
+		    !format_by_fourcc(argp->pixel_format))
+			return -EINVAL;
+
+		argp->type = V4L2_FRMIVAL_TYPE_CONTINUOUS;
+		argp->stepwise.min.numerator = 1;
+		argp->stepwise.min.denominator = V4L2LOOPBACK_FPS_MAX;
+		argp->stepwise.max.numerator = V4L2LOOPBACK_FRAME_INTERVAL_MAX;
+		argp->stepwise.max.denominator = 1;
+		argp->stepwise.step.numerator = 1;
+		argp->stepwise.step.denominator = 1;
+	}
+
+	return 0;
+}
+
+/* Enumerate device formats
+ * Returns:
+ * -   EINVAL the index is out of bounds; or if non-zero when format is fixed
+ * -   EFAULT unexpected null pointer */
+static int vidioc_enum_fmt_vid(struct file *file, void *fh,
+			       struct v4l2_fmtdesc *f)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	int fixed = dev->keep_format || has_other_owners(opener, dev);
+	const struct v4l2l_format *fmt;
+
+	if (check_buffer_capability(dev, opener, f->type) < 0)
+		return -EINVAL;
+
+	if (!(f->index < FORMATS))
+		return -EINVAL;
+	/* TODO: Support 6.14 V4L2_FMTDESC_FLAG_ENUM_ALL */
+	if (fixed && f->index)
+		return -EINVAL;
+
+	fmt = fixed ? format_by_fourcc(dev->pix_format.pixelformat) :
+		      &formats[f->index];
+	if (!fmt)
+		return -EFAULT;
+
+	f->flags = 0;
+	if (fmt->flags & FORMAT_FLAGS_COMPRESSED)
+		f->flags |= V4L2_FMT_FLAG_COMPRESSED;
+	snprintf(f->description, sizeof(f->description), fmt->name);
+	f->pixelformat = fmt->fourcc;
+	return 0;
+}
+
+/* Tests (or tries) the format.
+ * Returns:
+ * -   EINVAL if the buffer type or format is not supported
+ */
+static int vidioc_try_fmt_vid(struct file *file, void *fh,
+			      struct v4l2_format *f)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+
+	if (check_buffer_capability(dev, opener, f->type) < 0)
+		return -EINVAL;
+	if (v4l2l_fill_format(f, dev->min_width, dev->max_width,
+			      dev->min_height, dev->max_height) != 0)
+		return -EINVAL;
+	if (dev->keep_format || has_other_owners(opener, dev))
+		/* use existing format - including colorspace info */
+		f->fmt.pix = dev->pix_format;
+
+	return 0;
+}
+
+/* Sets new format. Fills 'f' argument with the requested or existing format.
+ * Side-effect: buffers are allocated for the (returned) format.
+ * Returns:
+ * -   EINVAL if the type is not supported
+ * -   EBUSY if buffers are already allocated
+ * TODO: (vasaka) set subregions of input
+ */
+static int vidioc_s_fmt_vid(struct file *file, void *fh, struct v4l2_format *f)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	u32 token = opener->io_method == V4L2L_IO_TIMEOUT ?
+			    V4L2L_TOKEN_TIMEOUT :
+			    token_from_type(f->type);
+	int changed, result;
+	char buf[5];
+
+	result = vidioc_try_fmt_vid(file, fh, f);
+	if (result < 0)
+		return result;
+
+	if (opener->buffer_count > 0)
+		/* must free buffers before format can be set */
+		return -EBUSY;
+
+	result = mutex_lock_killable(&dev->image_mutex);
+	if (result < 0)
+		return result;
+
+	if (opener->format_token)
+		release_token(dev, opener, format);
+	if (!(dev->format_tokens & token)) {
+		result = -EBUSY;
+		goto exit_s_fmt_unlock;
+	}
+
+	dprintk("S_FMT[%s] %4s:%ux%u size=%u\n",
+		V4L2_TYPE_IS_CAPTURE(f->type) ? "CAPTURE" : "OUTPUT",
+		fourcc2str(f->fmt.pix.pixelformat, buf), f->fmt.pix.width,
+		f->fmt.pix.height, f->fmt.pix.sizeimage);
+	changed = !pix_format_eq(&dev->pix_format, &f->fmt.pix, 0);
+	if (changed || has_no_owners(dev)) {
+		result = allocate_buffers(dev, &f->fmt.pix);
+		if (result < 0)
+			goto exit_s_fmt_unlock;
+	}
+	if ((dev->timeout_image && changed) ||
+	    (!dev->timeout_image && need_timeout_buffer(dev, token))) {
+		result = allocate_timeout_buffer(dev);
+		if (result < 0)
+			goto exit_s_fmt_free;
+	}
+	if (changed) {
+		dev->pix_format = f->fmt.pix;
+		dev->pix_format_has_valid_sizeimage =
+			v4l2l_pix_format_has_valid_sizeimage(f);
+	}
+	acquire_token(dev, opener, format, token);
+	if (opener->io_method == V4L2L_IO_TIMEOUT)
+		dev->timeout_image_io = 0;
+	goto exit_s_fmt_unlock;
+exit_s_fmt_free:
+	free_buffers(dev);
+exit_s_fmt_unlock:
+	mutex_unlock(&dev->image_mutex);
+	return result;
+}
+
+/* ------------------ CAPTURE ----------------------- */
+/* ioctl for VIDIOC_ENUM_FMT, _G_FMT, _S_FMT, and _TRY_FMT when buffer type
+ * is V4L2_BUF_TYPE_VIDEO_CAPTURE */
+
+static int vidioc_enum_fmt_cap(struct file *file, void *fh,
+			       struct v4l2_fmtdesc *f)
+{
+	return vidioc_enum_fmt_vid(file, fh, f);
+}
+
+static int vidioc_g_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	if (check_buffer_capability(dev, opener, f->type) < 0)
+		return -EINVAL;
+	f->fmt.pix = dev->pix_format;
+	return 0;
+}
+
+static int vidioc_try_fmt_cap(struct file *file, void *fh,
+			      struct v4l2_format *f)
+{
+	return vidioc_try_fmt_vid(file, fh, f);
+}
+
+static int vidioc_s_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
+{
+	return vidioc_s_fmt_vid(file, fh, f);
+}
+
+/* ------------------ OUTPUT ----------------------- */
+/* ioctl for VIDIOC_ENUM_FMT, _G_FMT, _S_FMT, and _TRY_FMT when buffer type
+ * is V4L2_BUF_TYPE_VIDEO_OUTPUT */
+
+static int vidioc_enum_fmt_out(struct file *file, void *fh,
+			       struct v4l2_fmtdesc *f)
+{
+	return vidioc_enum_fmt_vid(file, fh, f);
+}
+
+static int vidioc_g_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	if (check_buffer_capability(dev, opener, f->type) < 0)
+		return -EINVAL;
+	/*
+	 * LATER: this should return the currently valid format
+	 * gstreamer doesn't like it, if this returns -EINVAL, as it
+	 * then concludes that there is _no_ valid format
+	 * CHECK whether this assumption is wrong,
+	 * or whether we have to always provide a valid format
+	 */
+	f->fmt.pix = dev->pix_format;
+	return 0;
+}
+
+static int vidioc_try_fmt_out(struct file *file, void *fh,
+			      struct v4l2_format *f)
+{
+	return vidioc_try_fmt_vid(file, fh, f);
+}
+
+static int vidioc_s_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
+{
+	return vidioc_s_fmt_vid(file, fh, f);
+}
+
+// #define V4L2L_OVERLAY
+#ifdef V4L2L_OVERLAY
+/* ------------------ OVERLAY ----------------------- */
+/* currently unsupported */
+/* GSTreamer's v4l2sink is buggy, as it requires the overlay to work
+ * while it should only require it, if overlay is requested
+ * once the gstreamer element is fixed, remove the overlay dummies
+ */
+#warning OVERLAY dummies
+static int vidioc_g_fmt_overlay(struct file *file, void *priv,
+				struct v4l2_format *fmt)
+{
+	return 0;
+}
+
+static int vidioc_s_fmt_overlay(struct file *file, void *priv,
+				struct v4l2_format *fmt)
+{
+	return 0;
+}
+#endif /* V4L2L_OVERLAY */
+
+/* ------------------ PARAMs ----------------------- */
+
+/* get some data flow parameters, only capability, fps and readbuffers has
+ * effect on this driver
+ * called on VIDIOC_G_PARM
+ */
+static int vidioc_g_parm(struct file *file, void *fh,
+			 struct v4l2_streamparm *parm)
+{
+	/* do not care about type of opener, hope these enums would always be
+	 * compatible */
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	if (check_buffer_capability(dev, opener, parm->type) < 0)
+		return -EINVAL;
+	parm->parm.capture = dev->capture_param;
+	return 0;
+}
+
+/* get some data flow parameters, only capability, fps and readbuffers has
+ * effect on this driver
+ * called on VIDIOC_S_PARM
+ */
+static int vidioc_s_parm(struct file *file, void *fh,
+			 struct v4l2_streamparm *parm)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+
+	dprintk("S_PARM(frame-time=%u/%u)\n",
+		parm->parm.capture.timeperframe.numerator,
+		parm->parm.capture.timeperframe.denominator);
+	if (check_buffer_capability(dev, opener, parm->type) < 0)
+		return -EINVAL;
+
+	switch (parm->type) {
+	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
+		set_timeperframe(dev, &parm->parm.capture.timeperframe);
+		break;
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
+		set_timeperframe(dev, &parm->parm.output.timeperframe);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	parm->parm.capture = dev->capture_param;
+	return 0;
+}
+
+#ifdef V4L2LOOPBACK_WITH_STD
+/* sets a tv standard, actually we do not need to handle this any special way
+ * added to support effecttv
+ * called on VIDIOC_S_STD
+ */
+static int vidioc_s_std(struct file *file, void *fh, v4l2_std_id *_std)
+{
+	v4l2_std_id req_std = 0, supported_std = 0;
+	const v4l2_std_id all_std = V4L2_STD_ALL, no_std = 0;
+
+	if (_std) {
+		req_std = *_std;
+		*_std = all_std;
+	}
+
+	/* we support everything in V4L2_STD_ALL, but not more... */
+	supported_std = (all_std & req_std);
+	if (no_std == supported_std)
+		return -EINVAL;
+
+	return 0;
+}
+
+/* gets a fake video standard
+ * called on VIDIOC_G_STD
+ */
+static int vidioc_g_std(struct file *file, void *fh, v4l2_std_id *norm)
+{
+	if (norm)
+		*norm = V4L2_STD_ALL;
+	return 0;
+}
+/* gets a fake video standard
+ * called on VIDIOC_QUERYSTD
+ */
+static int vidioc_querystd(struct file *file, void *fh, v4l2_std_id *norm)
+{
+	if (norm)
+		*norm = V4L2_STD_ALL;
+	return 0;
+}
+#endif /* V4L2LOOPBACK_WITH_STD */
+
+static int v4l2loopback_set_ctrl(struct v4l2_loopback_device *dev, u32 id,
+				 s64 val)
+{
+	int result = 0;
+	switch (id) {
+	case CID_KEEP_FORMAT:
+		if (val < 0 || val > 1)
+			return -EINVAL;
+		dev->keep_format = val;
+		result = mutex_lock_killable(&dev->image_mutex);
+		if (result < 0)
+			return result;
+		if (!dev->keep_format) {
+			if (has_no_owners(dev) && !any_buffers_mapped(dev))
+				free_buffers(dev);
+		}
+		mutex_unlock(&dev->image_mutex);
+		break;
+	case CID_SUSTAIN_FRAMERATE:
+		if (val < 0 || val > 1)
+			return -EINVAL;
+		spin_lock_bh(&dev->lock);
+		dev->sustain_framerate = val;
+		check_timers(dev);
+		spin_unlock_bh(&dev->lock);
+		break;
+	case CID_TIMEOUT:
+		if (val < 0 || val > MAX_TIMEOUT)
+			return -EINVAL;
+		if (val > 0) {
+			result = mutex_lock_killable(&dev->image_mutex);
+			if (result < 0)
+				return result;
+			/* on-the-fly allocate if device is owned; else
+			 * allocate occurs on next S_FMT or REQBUFS */
+			if (!has_no_owners(dev))
+				result = allocate_timeout_buffer(dev);
+			mutex_unlock(&dev->image_mutex);
+			if (result < 0) {
+				/* disable timeout as buffer not alloc'd */
+				spin_lock_bh(&dev->lock);
+				dev->timeout_jiffies = 0;
+				spin_unlock_bh(&dev->lock);
+				return result;
+			}
+		}
+		spin_lock_bh(&dev->lock);
+		dev->timeout_jiffies = msecs_to_jiffies(val);
+		check_timers(dev);
+		spin_unlock_bh(&dev->lock);
+		break;
+	case CID_TIMEOUT_IMAGE_IO:
+		dev->timeout_image_io = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl)
+{
+	struct v4l2_loopback_device *dev = container_of(
+		ctrl->handler, struct v4l2_loopback_device, ctrl_handler);
+	return v4l2loopback_set_ctrl(dev, ctrl->id, ctrl->val);
+}
+
+/* returns set of device outputs, in our case there is only one
+ * called on VIDIOC_ENUMOUTPUT
+ */
+static int vidioc_enum_output(struct file *file, void *fh,
+			      struct v4l2_output *outp)
+{
+	__u32 index = outp->index;
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+
+	if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_OUTPUT))
+		return -ENOTTY;
+	if (index)
+		return -EINVAL;
+
+	/* clear all data (including the reserved fields) */
+	memset(outp, 0, sizeof(*outp));
+
+	outp->index = index;
+	strscpy(outp->name, "loopback in", sizeof(outp->name));
+	outp->type = V4L2_OUTPUT_TYPE_ANALOG;
+	outp->audioset = 0;
+	outp->modulator = 0;
+#ifdef V4L2LOOPBACK_WITH_STD
+	outp->std = V4L2_STD_ALL;
+#ifdef V4L2_OUT_CAP_STD
+	outp->capabilities |= V4L2_OUT_CAP_STD;
+#endif /*  V4L2_OUT_CAP_STD */
+#endif /* V4L2LOOPBACK_WITH_STD */
+
+	return 0;
+}
+
+/* which output is currently active,
+ * called on VIDIOC_G_OUTPUT
+ */
+static int vidioc_g_output(struct file *file, void *fh, unsigned int *index)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_OUTPUT))
+		return -ENOTTY;
+	if (index)
+		*index = 0;
+	return 0;
+}
+
+/* set output, can make sense if we have more than one video src,
+ * called on VIDIOC_S_OUTPUT
+ */
+static int vidioc_s_output(struct file *file, void *fh, unsigned int index)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_OUTPUT))
+		return -ENOTTY;
+	return index == 0 ? index : -EINVAL;
+}
+
+/* returns set of device inputs, in our case there is only one,
+ * but later I may add more
+ * called on VIDIOC_ENUMINPUT
+ */
+static int vidioc_enum_input(struct file *file, void *fh,
+			     struct v4l2_input *inp)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	__u32 index = inp->index;
+
+	if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_CAPTURE))
+		return -ENOTTY;
+	if (index)
+		return -EINVAL;
+
+	/* clear all data (including the reserved fields) */
+	memset(inp, 0, sizeof(*inp));
+
+	inp->index = index;
+	strscpy(inp->name, "loopback", sizeof(inp->name));
+	inp->type = V4L2_INPUT_TYPE_CAMERA;
+	inp->audioset = 0;
+	inp->tuner = 0;
+	inp->status = 0;
+
+#ifdef V4L2LOOPBACK_WITH_STD
+	inp->std = V4L2_STD_ALL;
+#ifdef V4L2_IN_CAP_STD
+	inp->capabilities |= V4L2_IN_CAP_STD;
+#endif
+#endif /* V4L2LOOPBACK_WITH_STD */
+
+	if (has_output_token(dev->stream_tokens) && !dev->keep_format)
+		/* if no outputs attached; pretend device is powered off */
+		inp->status |= V4L2_IN_ST_NO_SIGNAL;
+
+	return 0;
+}
+
+/* which input is currently active,
+ * called on VIDIOC_G_INPUT
+ */
+static int vidioc_g_input(struct file *file, void *fh, unsigned int *index)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_CAPTURE))
+		return -ENOTTY; /* NOTE: -EAGAIN might be more informative */
+	if (index)
+		*index = 0;
+	return 0;
+}
+
+/* set input, can make sense if we have more than one video src,
+ * called on VIDIOC_S_INPUT
+ */
+static int vidioc_s_input(struct file *file, void *fh, unsigned int index)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	if (index != 0)
+		return -EINVAL;
+	if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_CAPTURE))
+		return -ENOTTY; /* NOTE: -EAGAIN might be more informative */
+	return 0;
+}
+
+/* --------------- V4L2 ioctl buffer related calls ----------------- */
+
+#define is_allocated(opener, type, index)                                \
+	(opener->format_token & (opener->io_method == V4L2L_IO_TIMEOUT ? \
+					 V4L2L_TOKEN_TIMEOUT :           \
+					 token_from_type(type)) &&       \
+	 (index) < (opener)->buffer_count)
+#define BUFFER_DEBUG_FMT_STR                                      \
+	"buffer#%u @ %p type=%u bytesused=%u length=%u flags=%x " \
+	"field=%u timestamp= %lld.%06lldsequence=%u\n"
+#define BUFFER_DEBUG_FMT_ARGS(buf)                                         \
+	(buf)->index, (buf), (buf)->type, (buf)->bytesused, (buf)->length, \
+		(buf)->flags, (buf)->field,                                \
+		(long long)(buf)->timestamp.tv_sec,                        \
+		(long long)(buf)->timestamp.tv_usec, (buf)->sequence
+/* Buffer flag helpers */
+#define unset_flags(flags)                      \
+	do {                                    \
+		flags &= ~V4L2_BUF_FLAG_QUEUED; \
+		flags &= ~V4L2_BUF_FLAG_DONE;   \
+	} while (0)
+#define set_queued(flags)                      \
+	do {                                   \
+		flags |= V4L2_BUF_FLAG_QUEUED; \
+		flags &= ~V4L2_BUF_FLAG_DONE;  \
+	} while (0)
+#define set_done(flags)                         \
+	do {                                    \
+		flags &= ~V4L2_BUF_FLAG_QUEUED; \
+		flags |= V4L2_BUF_FLAG_DONE;    \
+	} while (0)
+
+static bool any_buffers_mapped(struct v4l2_loopback_device *dev)
+{
+	u32 index;
+	for (index = 0; index < dev->buffer_count; ++index)
+		if (dev->buffers[index].buffer.flags & V4L2_BUF_FLAG_MAPPED)
+			return true;
+	return false;
+}
+
+static void prepare_buffer_queue(struct v4l2_loopback_device *dev, int count)
+{
+	struct v4l2l_buffer *bufd, *n;
+	u32 pos;
+
+	spin_lock_bh(&dev->list_lock);
+
+	/* ensure sufficient number of buffers in queue */
+	for (pos = 0; pos < count; ++pos) {
+		bufd = &dev->buffers[pos];
+		if (list_empty(&bufd->list_head))
+			list_add_tail(&bufd->list_head, &dev->outbufs_list);
+	}
+	if (list_empty(&dev->outbufs_list))
+		goto exit_prepare_queue_unlock;
+
+	/* remove any excess buffers */
+	list_for_each_entry_safe(bufd, n, &dev->outbufs_list, list_head) {
+		if (bufd->buffer.index >= count)
+			list_del_init(&bufd->list_head);
+	}
+
+	/* buffers are no longer queued; and `write_position` will correspond
+	 * to the first item of `outbufs_list`. */
+	pos = v4l2l_mod64(dev->write_position, count);
+	list_for_each_entry(bufd, &dev->outbufs_list, list_head) {
+		unset_flags(bufd->buffer.flags);
+		dev->bufpos2index[pos % count] = bufd->buffer.index;
+		++pos;
+	}
+exit_prepare_queue_unlock:
+	spin_unlock_bh(&dev->list_lock);
+}
+
+/* forward declaration */
+static int vidioc_streamoff(struct file *file, void *fh,
+			    enum v4l2_buf_type type);
+/* negotiate buffer type
+ * only mmap streaming supported
+ * called on VIDIOC_REQBUFS
+ */
+static int vidioc_reqbufs(struct file *file, void *fh,
+			  struct v4l2_requestbuffers *reqbuf)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	u32 token = opener->io_method == V4L2L_IO_TIMEOUT ?
+			    V4L2L_TOKEN_TIMEOUT :
+			    token_from_type(reqbuf->type);
+	u32 req_count = reqbuf->count;
+	int result = 0;
+
+	dprintk("REQBUFS(memory=%u, req_count=%u) and device-bufs=%u/%u "
+		"[used/max]\n",
+		reqbuf->memory, req_count, dev->used_buffer_count,
+		dev->buffer_count);
+
+	switch (reqbuf->memory) {
+	case V4L2_MEMORY_MMAP:
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0)
+		reqbuf->capabilities = 0; /* only guarantee MMAP support */
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0)
+		reqbuf->flags = 0; /* no memory consistency support */
+#endif
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (opener->format_token & ~token)
+		/* different (buffer) type already assigned to descriptor by
+		 * S_FMT or REQBUFS */
+		return -EINVAL;
+
+	MARK();
+	result = mutex_lock_killable(&dev->image_mutex);
+	if (result < 0)
+		return result; /* -EINTR */
+
+	/* CASE queue/dequeue timeout-buffer only: */
+	if (opener->format_token & V4L2L_TOKEN_TIMEOUT) {
+		opener->buffer_count = req_count;
+		if (req_count == 0)
+			release_token(dev, opener, format);
+		goto exit_reqbufs_unlock;
+	}
+
+	MARK();
+	/* CASE count is zero: streamoff, free buffers, release their token */
+	if (req_count == 0) {
+		if (dev->format_tokens & token) {
+			acquire_token(dev, opener, format, token);
+			opener->io_method = V4L2L_IO_MMAP;
+		}
+		result = vidioc_streamoff(file, fh, reqbuf->type);
+		opener->buffer_count = 0;
+		/* undocumented requirement - REQBUFS with count zero should
+		 * ALSO release lock on logical stream */
+		if (opener->format_token)
+			release_token(dev, opener, format);
+		if (has_no_owners(dev))
+			dev->used_buffer_count = 0;
+		goto exit_reqbufs_unlock;
+	}
+
+	/* CASE count non-zero: allocate buffers and acquire token for them */
+	MARK();
+	switch (reqbuf->type) {
+	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
+		if (!(dev->format_tokens & token ||
+		      opener->format_token & token))
+			/* only exclusive ownership for each stream */
+			result = -EBUSY;
+		break;
+	default:
+		result = -EINVAL;
+	}
+	if (result < 0)
+		goto exit_reqbufs_unlock;
+
+	if (has_other_owners(opener, dev) && dev->used_buffer_count > 0) {
+		/* allow 'allocation' of existing number of buffers */
+		req_count = dev->used_buffer_count;
+	} else if (any_buffers_mapped(dev)) {
+		/* do not allow re-allocation if buffers are mapped */
+		result = -EBUSY;
+		goto exit_reqbufs_unlock;
+	}
+
+	MARK();
+	opener->buffer_count = 0;
+
+	if (req_count > dev->buffer_count)
+		req_count = dev->buffer_count;
+
+	if (has_no_owners(dev)) {
+		result = allocate_buffers(dev, &dev->pix_format);
+		if (result < 0)
+			goto exit_reqbufs_unlock;
+	}
+	if (!dev->timeout_image && need_timeout_buffer(dev, token)) {
+		result = allocate_timeout_buffer(dev);
+		if (result < 0)
+			goto exit_reqbufs_unlock;
+	}
+	acquire_token(dev, opener, format, token);
+
+	MARK();
+	switch (opener->io_method) {
+	case V4L2L_IO_TIMEOUT:
+		dev->timeout_image_io = 0;
+		opener->buffer_count = req_count;
+		break;
+	default:
+		opener->io_method = V4L2L_IO_MMAP;
+		prepare_buffer_queue(dev, req_count);
+		dev->used_buffer_count = opener->buffer_count = req_count;
+	}
+exit_reqbufs_unlock:
+	mutex_unlock(&dev->image_mutex);
+	reqbuf->count = opener->buffer_count;
+	return result;
+}
+
+/* returns buffer asked for;
+ * give app as many buffers as it wants, if it less than MAX,
+ * but map them in our inner buffers
+ * called on VIDIOC_QUERYBUF
+ */
+static int vidioc_querybuf(struct file *file, void *fh, struct v4l2_buffer *buf)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	u32 type = buf->type;
+	u32 index = buf->index;
+
+	if ((type != V4L2_BUF_TYPE_VIDEO_CAPTURE) &&
+	    (type != V4L2_BUF_TYPE_VIDEO_OUTPUT))
+		return -EINVAL;
+	if (!is_allocated(opener, type, index))
+		return -EINVAL;
+
+	if (opener->format_token & V4L2L_TOKEN_TIMEOUT) {
+		*buf = dev->timeout_buffer.buffer;
+		buf->index = index;
+	} else
+		*buf = dev->buffers[index].buffer;
+
+	buf->type = type;
+
+	if (!(buf->flags & (V4L2_BUF_FLAG_DONE | V4L2_BUF_FLAG_QUEUED))) {
+		/* v4l2-compliance requires these to be zero */
+		buf->sequence = 0;
+		buf->timestamp.tv_sec = buf->timestamp.tv_usec = 0;
+	} else if (V4L2_TYPE_IS_CAPTURE(type)) {
+		/* guess flags based on sequence values */
+		if (buf->sequence >= opener->read_position) {
+			set_done(buf->flags);
+		} else if (buf->flags & V4L2_BUF_FLAG_DONE) {
+			set_queued(buf->flags);
+		}
+	}
+	dprintkrw("QUERYBUF(%s, index=%u) -> " BUFFER_DEBUG_FMT_STR,
+		  V4L2_TYPE_IS_CAPTURE(type) ? "CAPTURE" : "OUTPUT", index,
+		  BUFFER_DEBUG_FMT_ARGS(buf));
+	return 0;
+}
+
+static void buffer_written(struct v4l2_loopback_device *dev,
+			   struct v4l2l_buffer *buf)
+{
+	timer_delete_sync(&dev->sustain_timer);
+	timer_delete_sync(&dev->timeout_timer);
+
+	spin_lock_bh(&dev->list_lock);
+	list_move_tail(&buf->list_head, &dev->outbufs_list);
+	spin_unlock_bh(&dev->list_lock);
+
+	spin_lock_bh(&dev->lock);
+	dev->bufpos2index[v4l2l_mod64(dev->write_position,
+				      dev->used_buffer_count)] =
+		buf->buffer.index;
+	++dev->write_position;
+	dev->reread_count = 0;
+
+	check_timers(dev);
+	spin_unlock_bh(&dev->lock);
+}
+
+/* put buffer to queue
+ * called on VIDIOC_QBUF
+ */
+static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	struct v4l2l_buffer *bufd;
+	u32 index = buf->index;
+	u32 type = buf->type;
+
+	if (!is_allocated(opener, type, index))
+		return -EINVAL;
+	bufd = &dev->buffers[index];
+
+	switch (buf->memory) {
+	case V4L2_MEMORY_MMAP:
+		if (!(bufd->buffer.flags & V4L2_BUF_FLAG_MAPPED))
+			dprintkrw("QBUF() unmapped buffer [index=%u]\n", index);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (opener->format_token & V4L2L_TOKEN_TIMEOUT) {
+		set_queued(buf->flags);
+		return 0;
+	}
+
+	switch (type) {
+	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
+		dprintkrw("QBUF(CAPTURE, index=%u) -> " BUFFER_DEBUG_FMT_STR,
+			  index, BUFFER_DEBUG_FMT_ARGS(buf));
+		set_queued(buf->flags);
+		break;
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
+		dprintkrw("QBUF(OUTPUT, index=%u) -> " BUFFER_DEBUG_FMT_STR,
+			  index, BUFFER_DEBUG_FMT_ARGS(buf));
+		if (!(bufd->buffer.flags & V4L2_BUF_FLAG_TIMESTAMP_COPY) &&
+		    (buf->timestamp.tv_sec == 0 &&
+		     buf->timestamp.tv_usec == 0)) {
+			v4l2l_get_timestamp(&bufd->buffer);
+		} else {
+			bufd->buffer.timestamp = buf->timestamp;
+			bufd->buffer.flags |= V4L2_BUF_FLAG_TIMESTAMP_COPY;
+			bufd->buffer.flags &=
+				~V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+		}
+		if (dev->pix_format_has_valid_sizeimage) {
+			if (buf->bytesused >= dev->pix_format.sizeimage) {
+				bufd->buffer.bytesused =
+					dev->pix_format.sizeimage;
+			} else {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
+				dev_warn_ratelimited(
+					&dev->vdev->dev,
+#else
+				dprintkrw(
+#endif
+					"warning queued output buffer bytesused too small %u < %u\n",
+					buf->bytesused,
+					dev->pix_format.sizeimage);
+				bufd->buffer.bytesused = buf->bytesused;
+			}
+		} else {
+			bufd->buffer.bytesused = buf->bytesused;
+		}
+		bufd->buffer.sequence = dev->write_position;
+		set_queued(bufd->buffer.flags);
+		*buf = bufd->buffer;
+		buffer_written(dev, bufd);
+		set_done(bufd->buffer.flags);
+		wake_up_all(&dev->read_event);
+		break;
+	default:
+		return -EINVAL;
+	}
+	buf->type = type;
+	return 0;
+}
+
+static int can_read(struct v4l2_loopback_device *dev,
+		    struct v4l2_loopback_opener *opener)
+{
+	int ret;
+
+	spin_lock_bh(&dev->lock);
+	check_timers(dev);
+	ret = dev->write_position > opener->read_position ||
+	      dev->reread_count > opener->reread_count || dev->timeout_happened;
+	spin_unlock_bh(&dev->lock);
+	return ret;
+}
+
+static int get_capture_buffer(struct file *file)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener =
+		v4l2l_f_to_opener(file, file->private_data);
+	int pos, timeout_happened;
+	u32 index;
+
+	if ((file->f_flags & O_NONBLOCK) &&
+	    (dev->write_position <= opener->read_position &&
+	     dev->reread_count <= opener->reread_count &&
+	     !dev->timeout_happened))
+		return -EAGAIN;
+	wait_event_interruptible(dev->read_event, can_read(dev, opener));
+
+	spin_lock_bh(&dev->lock);
+	if (dev->write_position == opener->read_position) {
+		if (dev->reread_count > opener->reread_count + 2)
+			opener->reread_count = dev->reread_count - 1;
+		++opener->reread_count;
+		pos = v4l2l_mod64(opener->read_position +
+					  dev->used_buffer_count - 1,
+				  dev->used_buffer_count);
+	} else {
+		opener->reread_count = 0;
+		if (dev->write_position >
+		    opener->read_position + dev->used_buffer_count)
+			opener->read_position = dev->write_position - 1;
+		pos = v4l2l_mod64(opener->read_position,
+				  dev->used_buffer_count);
+		++opener->read_position;
+	}
+	timeout_happened = dev->timeout_happened && (dev->timeout_jiffies > 0);
+	dev->timeout_happened = 0;
+	spin_unlock_bh(&dev->lock);
+
+	index = dev->bufpos2index[pos];
+	if (timeout_happened) {
+		if (index >= dev->used_buffer_count) {
+			dprintkrw("get_capture_buffer() read position is at "
+				  "an unallocated buffer [index=%u]\n",
+				  index);
+			return -EFAULT;
+		}
+		/* although allocated on-demand, timeout_image is freed only
+		 * in free_buffers(), so we don't need to worry about it being
+		 * deallocated suddenly */
+		memcpy(dev->image + dev->buffers[index].buffer.m.offset,
+		       dev->timeout_image, dev->buffer_size);
+	}
+	return (int)index;
+}
+
+/* put buffer to dequeue
+ * called on VIDIOC_DQBUF
+ */
+static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	u32 type = buf->type;
+	int index;
+	struct v4l2l_buffer *bufd;
+
+	if (buf->memory != V4L2_MEMORY_MMAP)
+		return -EINVAL;
+	if (opener->format_token & V4L2L_TOKEN_TIMEOUT) {
+		*buf = dev->timeout_buffer.buffer;
+		buf->type = type;
+		unset_flags(buf->flags);
+		return 0;
+	}
+	if ((opener->buffer_count == 0) ||
+	    !(opener->format_token & token_from_type(type)))
+		return -EINVAL;
+
+	switch (type) {
+	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
+		index = get_capture_buffer(file);
+		if (index < 0)
+			return index;
+		*buf = dev->buffers[index].buffer;
+		unset_flags(buf->flags);
+		break;
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
+		spin_lock_bh(&dev->list_lock);
+
+		bufd = list_first_entry_or_null(&dev->outbufs_list,
+						struct v4l2l_buffer, list_head);
+		if (bufd)
+			list_move_tail(&bufd->list_head, &dev->outbufs_list);
+
+		spin_unlock_bh(&dev->list_lock);
+		if (!bufd)
+			return -EFAULT;
+		unset_flags(bufd->buffer.flags);
+		*buf = bufd->buffer;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	buf->type = type;
+	dprintkrw("DQBUF(%s, index=%u) -> " BUFFER_DEBUG_FMT_STR,
+		  V4L2_TYPE_IS_CAPTURE(type) ? "CAPTURE" : "OUTPUT", index,
+		  BUFFER_DEBUG_FMT_ARGS(buf));
+	return 0;
+}
+
+/* ------------- STREAMING ------------------- */
+
+/* start streaming
+ * called on VIDIOC_STREAMON
+ */
+static int vidioc_streamon(struct file *file, void *fh, enum v4l2_buf_type type)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	u32 token = token_from_type(type);
+
+	/* short-circuit when using timeout buffer set */
+	if (opener->format_token & V4L2L_TOKEN_TIMEOUT)
+		return 0;
+	/* opener must have claimed (same) buffer set via REQBUFS */
+	if (!opener->buffer_count || !(opener->format_token & token))
+		return -EINVAL;
+
+	switch (type) {
+	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
+		if (has_output_token(dev->stream_tokens) && !dev->keep_format)
+			return -EIO;
+		if (dev->stream_tokens & token) {
+			acquire_token(dev, opener, stream, token);
+			client_usage_queue_event(dev->vdev);
+		}
+		return 0;
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
+		if (dev->stream_tokens & token)
+			acquire_token(dev, opener, stream, token);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+/* stop streaming
+ * called on VIDIOC_STREAMOFF
+ */
+static int vidioc_streamoff(struct file *file, void *fh,
+			    enum v4l2_buf_type type)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	u32 token = token_from_type(type);
+
+	/* short-circuit when using timeout buffer set */
+	if (opener->format_token & V4L2L_TOKEN_TIMEOUT)
+		return 0;
+	/* short-circuit when buffer set has no owner */
+	if (dev->format_tokens & token)
+		return 0;
+	/* opener needs a claim to buffer set */
+	if (!opener->format_token)
+		return -EBUSY;
+	if (opener->format_token & ~token)
+		return -EINVAL;
+
+	switch (type) {
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
+		if (opener->stream_token & token)
+			release_token(dev, opener, stream);
+		/* reset output queue */
+		if (dev->used_buffer_count > 0)
+			prepare_buffer_queue(dev, dev->used_buffer_count);
+		return 0;
+	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
+		if (opener->stream_token & token) {
+			release_token(dev, opener, stream);
+			client_usage_queue_event(dev->vdev);
+		}
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+#ifdef CONFIG_VIDEO_V4L1_COMPAT
+static int vidiocgmbuf(struct file *file, void *fh, struct video_mbuf *p)
+{
+	struct v4l2_loopback_device *dev;
+	MARK();
+
+	dev = v4l2loopback_getdevice(file);
+	p->frames = dev->buffer_count;
+	p->offsets[0] = 0;
+	p->offsets[1] = 0;
+	p->size = dev->buffer_size;
+	return 0;
+}
+#endif
+
+static void client_usage_queue_event(struct video_device *vdev)
+{
+	struct v4l2_event ev;
+	struct v4l2_loopback_device *dev;
+
+	dev = container_of(vdev->v4l2_dev, struct v4l2_loopback_device,
+			   v4l2_dev);
+
+	memset(&ev, 0, sizeof(ev));
+	ev.type = V4L2_EVENT_PRI_CLIENT_USAGE;
+	((struct v4l2_event_client_usage *)&ev.u)->count =
+		!has_capture_token(dev->stream_tokens);
+
+	v4l2_event_queue(vdev, &ev);
+}
+
+static int client_usage_ops_add(struct v4l2_subscribed_event *sev,
+				unsigned elems)
+{
+	if (!(sev->flags & V4L2_EVENT_SUB_FL_SEND_INITIAL))
+		return 0;
+
+	client_usage_queue_event(sev->fh->vdev);
+	return 0;
+}
+
+static void client_usage_ops_replace(struct v4l2_event *old,
+				     const struct v4l2_event *new)
+{
+	*((struct v4l2_event_client_usage *)&old->u) =
+		*((struct v4l2_event_client_usage *)&new->u);
+}
+
+static void client_usage_ops_merge(const struct v4l2_event *old,
+				   struct v4l2_event *new)
+{
+	*((struct v4l2_event_client_usage *)&new->u) =
+		*((struct v4l2_event_client_usage *)&old->u);
+}
+
+const struct v4l2_subscribed_event_ops client_usage_ops = {
+	.add = client_usage_ops_add,
+	.replace = client_usage_ops_replace,
+	.merge = client_usage_ops_merge,
+};
+
+static int vidioc_subscribe_event(struct v4l2_fh *fh,
+				  const struct v4l2_event_subscription *sub)
+{
+	switch (sub->type) {
+	case V4L2_EVENT_CTRL:
+		return v4l2_ctrl_subscribe_event(fh, sub);
+	case V4L2_EVENT_PRI_CLIENT_USAGE:
+		return v4l2_event_subscribe(fh, sub, 0, &client_usage_ops);
+	}
+
+	return -EINVAL;
+}
+
+/* file operations */
+static void vm_open(struct vm_area_struct *vma)
+{
+	struct v4l2l_buffer *buf;
+	MARK();
+
+	buf = vma->vm_private_data;
+	atomic_inc(&buf->use_count);
+	buf->buffer.flags |= V4L2_BUF_FLAG_MAPPED;
+}
+
+static void vm_close(struct vm_area_struct *vma)
+{
+	struct v4l2l_buffer *buf;
+	MARK();
+
+	buf = vma->vm_private_data;
+	if (atomic_dec_and_test(&buf->use_count))
+		buf->buffer.flags &= ~V4L2_BUF_FLAG_MAPPED;
+}
+
+static struct vm_operations_struct vm_ops = {
+	.open = vm_open,
+	.close = vm_close,
+};
+
+static int v4l2_loopback_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	u8 *addr;
+	unsigned long start, size, offset;
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener =
+		v4l2l_f_to_opener(file, file->private_data);
+	struct v4l2l_buffer *buffer = NULL;
+	int result = 0;
+	MARK();
+
+	offset = (unsigned long)vma->vm_pgoff << PAGE_SHIFT;
+	start = (unsigned long)vma->vm_start;
+	size = (unsigned long)(vma->vm_end - vma->vm_start); /* always != 0 */
+
+	/* ensure buffer size, count, and allocated image(s) are not altered by
+	 * other file descriptors */
+	result = mutex_lock_killable(&dev->image_mutex);
+	if (result < 0)
+		return result;
+
+	if (size > dev->buffer_size) {
+		dprintk("mmap() attempt to map %lubytes when %ubytes are "
+			"allocated to buffers\n",
+			size, dev->buffer_size);
+		result = -EINVAL;
+		goto exit_mmap_unlock;
+	}
+	if (offset % dev->buffer_size != 0) {
+		dprintk("mmap() offset does not match start of any buffer\n");
+		result = -EINVAL;
+		goto exit_mmap_unlock;
+	}
+	switch (opener->format_token) {
+	case V4L2L_TOKEN_TIMEOUT:
+		if (offset != (unsigned long)dev->buffer_size * MAX_BUFFERS) {
+			dprintk("mmap() incorrect offset for timeout image\n");
+			result = -EINVAL;
+			goto exit_mmap_unlock;
+		}
+		buffer = &dev->timeout_buffer;
+		addr = dev->timeout_image;
+		break;
+	default:
+		if (offset >= dev->image_size) {
+			dprintk("mmap() attempt to map beyond all buffers\n");
+			result = -EINVAL;
+			goto exit_mmap_unlock;
+		}
+		u32 index = offset / dev->buffer_size;
+		buffer = &dev->buffers[index];
+		addr = dev->image + offset;
+		break;
+	}
+
+	while (size > 0) {
+		struct page *page = vmalloc_to_page(addr);
+
+		result = vm_insert_page(vma, start, page);
+		if (result < 0)
+			goto exit_mmap_unlock;
+
+		start += PAGE_SIZE;
+		addr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+
+	vma->vm_ops = &vm_ops;
+	vma->vm_private_data = buffer;
+
+	vm_open(vma);
+exit_mmap_unlock:
+	mutex_unlock(&dev->image_mutex);
+	return result;
+}
+
+static unsigned int v4l2_loopback_poll(struct file *file,
+				       struct poll_table_struct *pts)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener =
+		v4l2l_f_to_opener(file, file->private_data);
+	__poll_t req_events = poll_requested_events(pts);
+	int ret_mask = 0;
+
+	/* call poll_wait in first call, regardless, to ensure that the
+	 * wait-queue is not null */
+	poll_wait(file, &dev->read_event, pts);
+	poll_wait(file, &opener->fh.wait, pts);
+
+	if (req_events & POLLPRI) {
+		if (v4l2_event_pending(&opener->fh)) {
+			ret_mask |= POLLPRI;
+			if (!(req_events & DEFAULT_POLLMASK))
+				return ret_mask;
+		}
+	}
+
+	switch (opener->format_token) {
+	case V4L2L_TOKEN_OUTPUT:
+		if (opener->stream_token != 0 ||
+		    opener->io_method == V4L2L_IO_NONE)
+			ret_mask |= POLLOUT | POLLWRNORM;
+		break;
+	case V4L2L_TOKEN_CAPTURE:
+		if ((opener->io_method == V4L2L_IO_NONE ||
+		     opener->stream_token != 0) &&
+		    can_read(dev, opener))
+			ret_mask |= POLLIN | POLLWRNORM;
+		break;
+	case V4L2L_TOKEN_TIMEOUT:
+		ret_mask |= POLLOUT | POLLWRNORM;
+		break;
+	default:
+		break;
+	}
+
+	return ret_mask;
+}
+
+/* do not want to limit device opens, it can be as many readers as user want,
+ * writers are limited by means of setting writer field */
+static int v4l2_loopback_open(struct file *file)
+{
+	struct v4l2_loopback_device *dev;
+	struct v4l2_loopback_opener *opener;
+
+	dev = v4l2loopback_getdevice(file);
+	if (dev->open_count.counter >= dev->max_openers)
+		return -EBUSY;
+	/* kfree on close */
+	opener = kzalloc(sizeof(*opener), GFP_KERNEL);
+	if (opener == NULL)
+		return -ENOMEM;
+
+	atomic_inc(&dev->open_count);
+	if (dev->timeout_image_io && dev->format_tokens & V4L2L_TOKEN_TIMEOUT)
+		/* will clear timeout_image_io once buffer set acquired */
+		opener->io_method = V4L2L_IO_TIMEOUT;
+
+	v4l2_fh_init(&opener->fh, video_devdata(file));
+	file->private_data = &opener->fh;
+
+	v4l2_fh_add(&opener->fh, file);
+	dprintk("open() -> dev@%p with image@%p\n", dev,
+		dev ? dev->image : NULL);
+	return 0;
+}
+
+static int v4l2_loopback_close(struct file *file)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener =
+		v4l2l_f_to_opener(file, file->private_data);
+	int result = 0;
+	dprintk("close() -> dev@%p with image@%p\n", dev,
+		dev ? dev->image : NULL);
+
+	if (opener->format_token) {
+		struct v4l2_requestbuffers reqbuf = {
+			.count = 0, .memory = V4L2_MEMORY_MMAP, .type = 0
+		};
+		switch (opener->format_token) {
+		case V4L2L_TOKEN_CAPTURE:
+			reqbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+			break;
+		case V4L2L_TOKEN_OUTPUT:
+		case V4L2L_TOKEN_TIMEOUT:
+			reqbuf.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
+			break;
+		}
+		if (reqbuf.type)
+			result = vidioc_reqbufs(file, file->private_data,
+						&reqbuf);
+		if (result < 0)
+			dprintk("failed to free buffers REQBUFS(count=0) "
+				" returned %d\n",
+				result);
+		mutex_lock(&dev->image_mutex);
+		release_token(dev, opener, format);
+		mutex_unlock(&dev->image_mutex);
+	}
+
+	if (atomic_dec_and_test(&dev->open_count)) {
+		timer_delete_sync(&dev->sustain_timer);
+		timer_delete_sync(&dev->timeout_timer);
+		if (!dev->keep_format) {
+			mutex_lock(&dev->image_mutex);
+			free_buffers(dev);
+			mutex_unlock(&dev->image_mutex);
+		}
+	}
+
+	v4l2_fh_del(&opener->fh, file);
+	v4l2_fh_exit(&opener->fh);
+
+	kfree(opener);
+	return 0;
+}
+
+static int start_fileio(struct file *file, void *fh, enum v4l2_buf_type type)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_loopback_opener *opener = v4l2l_f_to_opener(file, fh);
+	struct v4l2_requestbuffers reqbuf = { .count = dev->buffer_count,
+					      .memory = V4L2_MEMORY_MMAP,
+					      .type = type };
+	int token = token_from_type(type);
+	int result;
+
+	if (opener->format_token & V4L2L_TOKEN_TIMEOUT ||
+	    opener->format_token & ~token)
+		return -EBUSY; /* NOTE: -EBADF might be more informative */
+
+	/* short-circuit if already have stream token */
+	if (opener->stream_token && opener->io_method == V4L2L_IO_FILE)
+		return 0;
+
+	/* otherwise attempt to acquire stream token and assign IO method */
+	if (!(dev->stream_tokens & token) || opener->io_method != V4L2L_IO_NONE)
+		return -EBUSY;
+
+	result = vidioc_reqbufs(file, fh, &reqbuf);
+	if (result < 0)
+		return result;
+	result = vidioc_streamon(file, fh, type);
+	if (result < 0)
+		return result;
+
+	opener->io_method = V4L2L_IO_FILE;
+	return 0;
+}
+
+static ssize_t v4l2_loopback_read(struct file *file, char __user *buf,
+				  size_t count, loff_t *ppos)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_buffer *b;
+	int index, result;
+
+	dprintkrw("read() %zu bytes\n", count);
+	result = start_fileio(file, file->private_data,
+			      V4L2_BUF_TYPE_VIDEO_CAPTURE);
+	if (result < 0)
+		return result;
+
+	index = get_capture_buffer(file);
+	if (index < 0)
+		return index;
+	b = &dev->buffers[index].buffer;
+	if (count > b->bytesused)
+		count = b->bytesused;
+	if (copy_to_user((void *)buf, (void *)(dev->image + b->m.offset),
+			 count)) {
+		printk(KERN_ERR "v4l2-loopback read() failed copy_to_user()\n");
+		return -EFAULT;
+	}
+	return count;
+}
+
+static ssize_t v4l2_loopback_write(struct file *file, const char __user *buf,
+				   size_t count, loff_t *ppos)
+{
+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
+	struct v4l2_buffer *b;
+	int index, result;
+
+	dprintkrw("write() %zu bytes\n", count);
+	result = start_fileio(file, file->private_data,
+			      V4L2_BUF_TYPE_VIDEO_OUTPUT);
+	if (result < 0)
+		return result;
+
+	if (count > dev->buffer_size)
+		count = dev->buffer_size;
+	index = v4l2l_mod64(dev->write_position, dev->used_buffer_count);
+	b = &dev->buffers[index].buffer;
+
+	if (copy_from_user((void *)(dev->image + b->m.offset), (void *)buf,
+			   count)) {
+		printk(KERN_ERR
+		       "v4l2-loopback write() failed copy_from_user()\n");
+		return -EFAULT;
+	}
+	b->bytesused = count;
+
+	v4l2l_get_timestamp(b);
+	b->sequence = dev->write_position;
+	set_queued(b->flags);
+	buffer_written(dev, &dev->buffers[index]);
+	set_done(b->flags);
+	wake_up_all(&dev->read_event);
+
+	return count;
+}
+
+/* init functions */
+/* frees buffers, if allocated */
+static void free_buffers(struct v4l2_loopback_device *dev)
+{
+	dprintk("free_buffers() with image@%p\n", dev->image);
+	if (!dev->image)
+		return;
+	if (!has_no_owners(dev) || any_buffers_mapped(dev))
+		/* maybe an opener snuck in before image_mutex was acquired */
+		printk(KERN_WARNING
+		       "v4l2-loopback free_buffers() buffers of video device "
+		       "#%u freed while still mapped to userspace\n",
+		       dev->vdev->num);
+	vfree(dev->image);
+	dev->image = NULL;
+	dev->image_size = 0;
+	dev->buffer_size = 0;
+}
+
+static void free_timeout_buffer(struct v4l2_loopback_device *dev)
+{
+	dprintk("free_timeout_buffer() with timeout_image@%p\n",
+		dev->timeout_image);
+	if (!dev->timeout_image)
+		return;
+
+	if ((dev->timeout_jiffies > 0 && !has_no_owners(dev)) ||
+	    dev->timeout_buffer.buffer.flags & V4L2_BUF_FLAG_MAPPED)
+		printk(KERN_WARNING
+		       "v4l2-loopback free_timeout_buffer() timeout image "
+		       "of device #%u freed while still mapped to userspace\n",
+		       dev->vdev->num);
+
+	vfree(dev->timeout_image);
+	dev->timeout_image = NULL;
+	dev->timeout_buffer_size = 0;
+}
+/* allocates buffers if no (other) openers are already using them */
+static int allocate_buffers(struct v4l2_loopback_device *dev,
+			    struct v4l2_pix_format *pix_format)
+{
+	u32 buffer_size = PAGE_ALIGN(pix_format->sizeimage);
+	unsigned long image_size =
+		(unsigned long)buffer_size * (unsigned long)dev->buffer_count;
+	/* vfree on close file operation in case no open handles left */
+
+	if (buffer_size == 0 || dev->buffer_count == 0 ||
+	    buffer_size < pix_format->sizeimage)
+		return -EINVAL;
+
+	if ((__LONG_MAX__ / buffer_size) < dev->buffer_count)
+		return -ENOSPC;
+
+	dprintk("allocate_buffers() size %lubytes = %ubytes x %ubuffers\n",
+		image_size, buffer_size, dev->buffer_count);
+	if (dev->image) {
+		/* check that no buffers are expected in user-space */
+		if (!has_no_owners(dev) || any_buffers_mapped(dev))
+			return -EBUSY;
+		dprintk("allocate_buffers() existing size=%lubytes\n",
+			dev->image_size);
+		/* FIXME: prevent double allocation more intelligently! */
+		if (image_size == dev->image_size) {
+			dprintk("allocate_buffers() keep existing\n");
+			return 0;
+		}
+		free_buffers(dev);
+	}
+
+	/* FIXME: set buffers to 0 */
+	dev->image = vmalloc(image_size);
+	if (dev->image == NULL) {
+		dev->buffer_size = dev->image_size = 0;
+		return -ENOMEM;
+	}
+	init_buffers(dev, pix_format->sizeimage, buffer_size);
+	dev->buffer_size = buffer_size;
+	dev->image_size = image_size;
+	dprintk("allocate_buffers() -> vmalloc'd %lubytes\n", dev->image_size);
+	return 0;
+}
+static int allocate_timeout_buffer(struct v4l2_loopback_device *dev)
+{
+	/* device's `buffer_size` and `buffers` must be initialised in
+	 * allocate_buffers() */
+
+	dprintk("allocate_timeout_buffer() size %ubytes\n", dev->buffer_size);
+	if (dev->buffer_size == 0)
+		return -EINVAL;
+
+	if (dev->timeout_image) {
+		if (dev->timeout_buffer.buffer.flags & V4L2_BUF_FLAG_MAPPED)
+			return -EBUSY;
+		if (dev->buffer_size == dev->timeout_buffer_size)
+			return 0;
+		free_timeout_buffer(dev);
+	}
+
+	dev->timeout_image = vzalloc(dev->buffer_size);
+	if (!dev->timeout_image) {
+		dev->timeout_buffer_size = 0;
+		return -ENOMEM;
+	}
+	dev->timeout_buffer_size = dev->buffer_size;
+	return 0;
+}
+/* init inner buffers, they are capture mode and flags are set as for capture
+ * mode buffers */
+static void init_buffers(struct v4l2_loopback_device *dev, u32 bytes_used,
+			 u32 buffer_size)
+{
+	u32 i;
+
+	for (i = 0; i < dev->buffer_count; ++i) {
+		struct v4l2_buffer *b = &dev->buffers[i].buffer;
+		b->index = i;
+		b->bytesused = bytes_used;
+		b->length = buffer_size;
+		b->field = V4L2_FIELD_NONE;
+		b->flags = 0;
+		b->m.offset = i * buffer_size;
+		b->memory = V4L2_MEMORY_MMAP;
+		b->sequence = 0;
+		b->timestamp.tv_sec = 0;
+		b->timestamp.tv_usec = 0;
+		b->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+		v4l2l_get_timestamp(b);
+	}
+	dev->timeout_buffer = dev->buffers[0];
+	dev->timeout_buffer.buffer.m.offset = MAX_BUFFERS * buffer_size;
+}
+
+/* fills and register video device */
+static void init_vdev(struct video_device *vdev, int nr)
+{
+#ifdef V4L2LOOPBACK_WITH_STD
+	vdev->tvnorms = V4L2_STD_ALL;
+#endif /* V4L2LOOPBACK_WITH_STD */
+
+	vdev->vfl_type = VFL_TYPE_VIDEO;
+	vdev->fops = &v4l2_loopback_fops;
+	vdev->ioctl_ops = &v4l2_loopback_ioctl_ops;
+	vdev->release = &video_device_release;
+	vdev->minor = -1;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	vdev->device_caps = V4L2_CAP_DEVICE_CAPS | V4L2_CAP_VIDEO_CAPTURE |
+			    V4L2_CAP_VIDEO_OUTPUT | V4L2_CAP_READWRITE |
+			    V4L2_CAP_STREAMING;
+#endif
+
+	if (debug > 1)
+		vdev->dev_debug = V4L2_DEV_DEBUG_IOCTL |
+				  V4L2_DEV_DEBUG_IOCTL_ARG;
+
+	vdev->vfl_dir = VFL_DIR_M2M;
+}
+
+/* init default capture parameters, only fps may be changed in future */
+static void init_capture_param(struct v4l2_captureparm *capture_param)
+{
+	capture_param->capability = V4L2_CAP_TIMEPERFRAME; /* since 2.16 */
+	capture_param->capturemode = 0;
+	capture_param->extendedmode = 0;
+	capture_param->readbuffers = max_buffers;
+	capture_param->timeperframe.numerator = 1;
+	capture_param->timeperframe.denominator = V4L2LOOPBACK_FPS_DEFAULT;
+}
+
+static void check_timers(struct v4l2_loopback_device *dev)
+{
+	if (has_output_token(dev->stream_tokens))
+		return;
+
+	if (dev->timeout_jiffies > 0 && !timer_pending(&dev->timeout_timer))
+		mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies);
+	if (dev->sustain_framerate && !timer_pending(&dev->sustain_timer))
+		mod_timer(&dev->sustain_timer,
+			  jiffies + dev->frame_jiffies * 3 / 2);
+}
+#ifdef HAVE_TIMER_SETUP
+static void sustain_timer_clb(struct timer_list *t)
+{
+	struct v4l2_loopback_device *dev =
+		container_of(t, struct v4l2_loopback_device, sustain_timer);
+#else
+static void sustain_timer_clb(unsigned long nr)
+{
+	struct v4l2_loopback_device *dev =
+		idr_find(&v4l2loopback_index_idr, nr);
+#endif
+	spin_lock(&dev->lock);
+	if (dev->sustain_framerate) {
+		dev->reread_count++;
+		dprintkrw("sustain_timer_clb() write_pos=%lld reread=%u\n",
+			  (long long)dev->write_position, dev->reread_count);
+		if (dev->reread_count == 1)
+			mod_timer(&dev->sustain_timer,
+				  jiffies + max(1UL, dev->frame_jiffies / 2));
+		else
+			mod_timer(&dev->sustain_timer,
+				  jiffies + dev->frame_jiffies);
+		wake_up_all(&dev->read_event);
+	}
+	spin_unlock(&dev->lock);
+}
+#ifdef HAVE_TIMER_SETUP
+static void timeout_timer_clb(struct timer_list *t)
+{
+	struct v4l2_loopback_device *dev =
+		container_of(t, struct v4l2_loopback_device, timeout_timer);
+#else
+static void timeout_timer_clb(unsigned long nr)
+{
+	struct v4l2_loopback_device *dev =
+		idr_find(&v4l2loopback_index_idr, nr);
+#endif
+	spin_lock(&dev->lock);
+	if (dev->timeout_jiffies > 0) {
+		dev->timeout_happened = 1;
+		mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies);
+		wake_up_all(&dev->read_event);
+	}
+	spin_unlock(&dev->lock);
+}
+
+/* init loopback main structure */
+#define DEFAULT_FROM_CONF(confmember, default_condition, default_value)        \
+	((conf) ?                                                              \
+		 ((conf->confmember default_condition) ? (default_value) :     \
+							 (conf->confmember)) : \
+		 default_value)
+
+static int v4l2_loopback_add(struct v4l2_loopback_config *conf, int *ret_nr)
+{
+	struct v4l2_loopback_device *dev;
+	struct v4l2_ctrl_handler *hdl;
+	struct v4l2loopback_private *vdev_priv = NULL;
+	int err;
+
+	u32 _width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH;
+	u32 _height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT;
+
+	u32 _min_width = DEFAULT_FROM_CONF(min_width,
+					   < V4L2LOOPBACK_SIZE_MIN_WIDTH,
+					   V4L2LOOPBACK_SIZE_MIN_WIDTH);
+	u32 _min_height = DEFAULT_FROM_CONF(min_height,
+					    < V4L2LOOPBACK_SIZE_MIN_HEIGHT,
+					    V4L2LOOPBACK_SIZE_MIN_HEIGHT);
+	u32 _max_width = DEFAULT_FROM_CONF(max_width, < _min_width, max_width);
+	u32 _max_height =
+		DEFAULT_FROM_CONF(max_height, < _min_height, max_height);
+	bool _announce_all_caps = (conf && conf->announce_all_caps >= 0) ?
+					  (bool)(conf->announce_all_caps) :
+					  !(V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS);
+	int _max_buffers = DEFAULT_FROM_CONF(max_buffers, <= 0, max_buffers);
+	int _max_openers = DEFAULT_FROM_CONF(max_openers, <= 0, max_openers);
+	struct v4l2_format _fmt;
+
+	int nr = -1;
+
+	if (conf) {
+		const int output_nr = conf->output_nr;
+#ifdef SPLIT_DEVICES
+		const int capture_nr = conf->capture_nr;
+#else
+		const int capture_nr = output_nr;
+#endif
+		if (capture_nr >= 0 && output_nr == capture_nr) {
+			nr = output_nr;
+		} else if (capture_nr < 0 && output_nr < 0) {
+			nr = -1;
+		} else if (capture_nr < 0) {
+			nr = output_nr;
+		} else if (output_nr < 0) {
+			nr = capture_nr;
+		} else {
+			printk(KERN_ERR
+			       "v4l2-loopback add() split OUTPUT and CAPTURE "
+			       "devices not yet supported.\n");
+			printk(KERN_INFO
+			       "v4l2-loopback add() both devices must have the "
+			       "same number (%d != %d).\n",
+			       output_nr, capture_nr);
+			return -EINVAL;
+		}
+	}
+
+	if (idr_find(&v4l2loopback_index_idr, nr))
+		return -EEXIST;
+
+	/* initialisation of a new device */
+	dprintk("add() creating device #%d\n", nr);
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	/* allocate id, if @id >= 0, we're requesting that specific id */
+	if (nr >= 0) {
+		err = idr_alloc(&v4l2loopback_index_idr, dev, nr, nr + 1,
+				GFP_KERNEL);
+		if (err == -ENOSPC)
+			err = -EEXIST;
+	} else {
+		err = idr_alloc(&v4l2loopback_index_idr, dev, 0, 0, GFP_KERNEL);
+	}
+	if (err < 0)
+		goto out_free_dev;
+
+	/* register new device */
+	MARK();
+	nr = err;
+
+	if (conf && conf->card_label[0]) {
+		snprintf(dev->card_label, sizeof(dev->card_label), "%s",
+			 conf->card_label);
+	} else {
+		snprintf(dev->card_label, sizeof(dev->card_label),
+			 "Dummy video device (0x%04X)", nr);
+	}
+	snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name),
+		 "v4l2loopback-%03d", nr);
+
+	err = v4l2_device_register(NULL, &dev->v4l2_dev);
+	if (err)
+		goto out_free_idr;
+
+	/* initialise the _video_ device */
+	MARK();
+	err = -ENOMEM;
+	dev->vdev = video_device_alloc();
+	if (dev->vdev == NULL)
+		goto out_unregister;
+
+	vdev_priv = kzalloc(sizeof(struct v4l2loopback_private), GFP_KERNEL);
+	if (vdev_priv == NULL)
+		goto out_unregister;
+
+	video_set_drvdata(dev->vdev, vdev_priv);
+	if (video_get_drvdata(dev->vdev) == NULL)
+		goto out_unregister;
+
+	snprintf(dev->vdev->name, sizeof(dev->vdev->name), "%s",
+		 dev->card_label);
+	vdev_priv->device_nr = nr;
+	init_vdev(dev->vdev, nr);
+	dev->vdev->v4l2_dev = &dev->v4l2_dev;
+
+	/* initialise v4l2-loopback specific parameters */
+	MARK();
+	dev->announce_all_caps = _announce_all_caps;
+	dev->min_width = _min_width;
+	dev->min_height = _min_height;
+	dev->max_width = _max_width;
+	dev->max_height = _max_height;
+	dev->max_openers = _max_openers;
+
+	/* set (initial) pixel and stream format */
+	_width = clamp_val(_width, _min_width, _max_width);
+	_height = clamp_val(_height, _min_height, _max_height);
+	_fmt = (struct v4l2_format){
+		.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+		.fmt.pix = { .width = _width,
+			     .height = _height,
+			     .pixelformat = formats[0].fourcc,
+			     .colorspace = V4L2_COLORSPACE_DEFAULT,
+			     .field = V4L2_FIELD_NONE }
+	};
+
+	err = v4l2l_fill_format(&_fmt, _min_width, _max_width, _min_height,
+				_max_height);
+	if (err)
+		/* highly unexpected failure to assign default format */
+		goto out_unregister;
+	dev->pix_format = _fmt.fmt.pix;
+	init_capture_param(&dev->capture_param);
+	set_timeperframe(dev, &dev->capture_param.timeperframe);
+
+	/* ctrls parameters */
+	dev->keep_format = 0;
+	dev->sustain_framerate = 0;
+	dev->timeout_jiffies = 0;
+	dev->timeout_image_io = 0;
+
+	/* initialise OUTPUT and CAPTURE buffer values */
+	dev->image = NULL;
+	dev->image_size = 0;
+	dev->buffer_count = _max_buffers;
+	dev->buffer_size = 0;
+	dev->used_buffer_count = 0;
+	INIT_LIST_HEAD(&dev->outbufs_list);
+	do {
+		u32 index;
+		for (index = 0; index < dev->buffer_count; ++index)
+			INIT_LIST_HEAD(&dev->buffers[index].list_head);
+
+	} while (0);
+	memset(dev->bufpos2index, 0, sizeof(dev->bufpos2index));
+	dev->write_position = 0;
+
+	/* initialise synchronisation data */
+	atomic_set(&dev->open_count, 0);
+	mutex_init(&dev->image_mutex);
+	spin_lock_init(&dev->lock);
+	spin_lock_init(&dev->list_lock);
+	init_waitqueue_head(&dev->read_event);
+	dev->format_tokens = V4L2L_TOKEN_MASK;
+	dev->stream_tokens = V4L2L_TOKEN_MASK;
+
+	/* initialise sustain frame rate and timeout parameters, and timers */
+	dev->reread_count = 0;
+	dev->timeout_image = NULL;
+	dev->timeout_happened = 0;
+#ifdef HAVE_TIMER_SETUP
+	timer_setup(&dev->sustain_timer, sustain_timer_clb, 0);
+	timer_setup(&dev->timeout_timer, timeout_timer_clb, 0);
+#else
+	setup_timer(&dev->sustain_timer, sustain_timer_clb, nr);
+	setup_timer(&dev->timeout_timer, timeout_timer_clb, nr);
+#endif
+
+	/* initialise the control handler and add controls */
+	MARK();
+	hdl = &dev->ctrl_handler;
+	err = v4l2_ctrl_handler_init(hdl, 4);
+	if (err)
+		goto out_unregister;
+	v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_keepformat, NULL);
+	v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_sustainframerate, NULL);
+	v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeout, NULL);
+	v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeoutimageio, NULL);
+	if (hdl->error) {
+		err = hdl->error;
+		goto out_free_handler;
+	}
+	dev->v4l2_dev.ctrl_handler = hdl;
+
+	err = v4l2_ctrl_handler_setup(hdl);
+	if (err)
+		goto out_free_handler;
+
+	/* register the device (creates /dev/video*) */
+	MARK();
+	if (video_register_device(dev->vdev, VFL_TYPE_VIDEO, nr) < 0) {
+		printk(KERN_ERR
+		       "v4l2-loopback add() failed video_register_device()\n");
+		err = -EFAULT;
+		goto out_free_device;
+	}
+	v4l2loopback_create_sysfs(dev->vdev);
+	/* NOTE: ambivalent if sysfs entries fail */
+
+	if (ret_nr)
+		*ret_nr = dev->vdev->num;
+	return 0;
+
+out_free_device:
+out_free_handler:
+	v4l2_ctrl_handler_free(&dev->ctrl_handler);
+out_unregister:
+	if (dev->vdev)
+		video_set_drvdata(dev->vdev, NULL);
+	video_device_release(dev->vdev);
+	if (vdev_priv != NULL)
+		kfree(vdev_priv);
+	v4l2_device_unregister(&dev->v4l2_dev);
+out_free_idr:
+	idr_remove(&v4l2loopback_index_idr, nr);
+out_free_dev:
+	kfree(dev);
+	return err;
+}
+
+static void v4l2_loopback_remove(struct v4l2_loopback_device *dev)
+{
+	int device_nr = v4l2loopback_get_vdev_nr(dev->vdev);
+	mutex_lock(&dev->image_mutex);
+	free_buffers(dev);
+	free_timeout_buffer(dev);
+	mutex_unlock(&dev->image_mutex);
+	v4l2loopback_remove_sysfs(dev->vdev);
+	v4l2_ctrl_handler_free(&dev->ctrl_handler);
+	kfree(video_get_drvdata(dev->vdev));
+	video_unregister_device(dev->vdev);
+	v4l2_device_unregister(&dev->v4l2_dev);
+	idr_remove(&v4l2loopback_index_idr, device_nr);
+	kfree(dev);
+}
+
+static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd,
+				       unsigned long parm)
+{
+	struct v4l2_loopback_device *dev;
+	struct v4l2_loopback_config conf;
+	struct v4l2_loopback_config *confptr = &conf;
+	int device_nr, capture_nr, output_nr;
+	int ret;
+	const __u32 version = V4L2LOOPBACK_VERSION_CODE;
+
+	ret = mutex_lock_killable(&v4l2loopback_ctl_mutex);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+	switch (cmd) {
+	default:
+		ret = -ENOSYS;
+		break;
+		/* add a v4l2loopback device (pair), based on the user-provided specs */
+	case V4L2LOOPBACK_CTL_ADD:
+	case V4L2LOOPBACK_CTL_ADD_legacy:
+		if (parm) {
+			if ((ret = copy_from_user(&conf, (void *)parm,
+						  sizeof(conf))) < 0)
+				break;
+		} else
+			confptr = NULL;
+		ret = v4l2_loopback_add(confptr, &device_nr);
+		if (ret >= 0)
+			ret = device_nr;
+		break;
+		/* remove a v4l2loopback device (both capture and output) */
+	case V4L2LOOPBACK_CTL_REMOVE:
+	case V4L2LOOPBACK_CTL_REMOVE_legacy:
+		ret = v4l2loopback_lookup((__u32)parm, &dev);
+		if (ret >= 0 && dev) {
+			ret = -EBUSY;
+			if (dev->open_count.counter > 0)
+				break;
+			v4l2_loopback_remove(dev);
+			ret = 0;
+		};
+		break;
+		/* get information for a loopback device.
+		 * this is mostly about limits (which cannot be queried directly with  VIDIOC_G_FMT and friends
+		 */
+	case V4L2LOOPBACK_CTL_QUERY:
+	case V4L2LOOPBACK_CTL_QUERY_legacy:
+		if (!parm)
+			break;
+		if ((ret = copy_from_user(&conf, (void *)parm, sizeof(conf))) <
+		    0)
+			break;
+		capture_nr = output_nr = conf.output_nr;
+#ifdef SPLIT_DEVICES
+		capture_nr = conf.capture_nr;
+#endif
+		device_nr = (output_nr < 0) ? capture_nr : output_nr;
+		MARK();
+		/* get the device from either capture_nr or output_nr (whatever is valid) */
+		if ((ret = v4l2loopback_lookup(device_nr, &dev)) < 0)
+			break;
+		MARK();
+		/* if we got the device from output_nr and there is a valid capture_nr,
+		 * make sure that both refer to the same device (or bail out)
+		 */
+		if ((device_nr != capture_nr) && (capture_nr >= 0) &&
+		    ((ret = v4l2loopback_lookup(capture_nr, 0)) < 0))
+			break;
+		MARK();
+		/* if otoh, we got the device from capture_nr and there is a valid output_nr,
+		 * make sure that both refer to the same device (or bail out)
+		 */
+		if ((device_nr != output_nr) && (output_nr >= 0) &&
+		    ((ret = v4l2loopback_lookup(output_nr, 0)) < 0))
+			break;
+
+		/* v4l2_loopback_config identified a single device, so fetch the data */
+		snprintf(conf.card_label, sizeof(conf.card_label), "%s",
+			 dev->card_label);
+
+		conf.output_nr = dev->vdev->num;
+#ifdef SPLIT_DEVICES
+		conf.capture_nr = dev->vdev->num;
+#endif
+		conf.min_width = dev->min_width;
+		conf.min_height = dev->min_height;
+		conf.max_width = dev->max_width;
+		conf.max_height = dev->max_height;
+		conf.announce_all_caps = dev->announce_all_caps;
+		conf.max_buffers = dev->buffer_count;
+		conf.max_openers = dev->max_openers;
+		conf.debug = debug;
+		MARK();
+		if (copy_to_user((void *)parm, &conf, sizeof(conf))) {
+			ret = -EFAULT;
+			break;
+		}
+		ret = 0;
+		break;
+	case V4L2LOOPBACK_CTL_VERSION:
+		if (!parm)
+			break;
+		if (copy_to_user((void *)parm, &version, sizeof(version))) {
+			ret = -EFAULT;
+			break;
+		}
+		ret = 0;
+		break;
+	}
+
+	mutex_unlock(&v4l2loopback_ctl_mutex);
+	MARK();
+	return ret;
+}
+
+/* LINUX KERNEL */
+
+static const struct file_operations v4l2loopback_ctl_fops = {
+	// clang-format off
+	.owner		= THIS_MODULE,
+	.open		= nonseekable_open,
+	.unlocked_ioctl	= v4l2loopback_control_ioctl,
+	.compat_ioctl	= v4l2loopback_control_ioctl,
+	.llseek		= noop_llseek,
+	// clang-format on
+};
+
+static struct miscdevice v4l2loopback_misc = {
+	// clang-format off
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name		= "v4l2loopback",
+	.fops		= &v4l2loopback_ctl_fops,
+	// clang-format on
+};
+
+static const struct v4l2_file_operations v4l2_loopback_fops = {
+	// clang-format off
+	.owner		= THIS_MODULE,
+	.open		= v4l2_loopback_open,
+	.release	= v4l2_loopback_close,
+	.read		= v4l2_loopback_read,
+	.write		= v4l2_loopback_write,
+	.poll		= v4l2_loopback_poll,
+	.mmap		= v4l2_loopback_mmap,
+	.unlocked_ioctl	= video_ioctl2,
+	// clang-format on
+};
+
+static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops = {
+	// clang-format off
+	.vidioc_querycap		= &vidioc_querycap,
+	.vidioc_enum_framesizes		= &vidioc_enum_framesizes,
+	.vidioc_enum_frameintervals	= &vidioc_enum_frameintervals,
+
+	.vidioc_enum_output		= &vidioc_enum_output,
+	.vidioc_g_output		= &vidioc_g_output,
+	.vidioc_s_output		= &vidioc_s_output,
+
+	.vidioc_enum_input		= &vidioc_enum_input,
+	.vidioc_g_input			= &vidioc_g_input,
+	.vidioc_s_input			= &vidioc_s_input,
+
+	.vidioc_enum_fmt_vid_cap	= &vidioc_enum_fmt_cap,
+	.vidioc_g_fmt_vid_cap		= &vidioc_g_fmt_cap,
+	.vidioc_s_fmt_vid_cap		= &vidioc_s_fmt_cap,
+	.vidioc_try_fmt_vid_cap		= &vidioc_try_fmt_cap,
+
+	.vidioc_enum_fmt_vid_out	= &vidioc_enum_fmt_out,
+	.vidioc_s_fmt_vid_out		= &vidioc_s_fmt_out,
+	.vidioc_g_fmt_vid_out		= &vidioc_g_fmt_out,
+	.vidioc_try_fmt_vid_out		= &vidioc_try_fmt_out,
+
+#ifdef V4L2L_OVERLAY
+	.vidioc_s_fmt_vid_overlay	= &vidioc_s_fmt_overlay,
+	.vidioc_g_fmt_vid_overlay	= &vidioc_g_fmt_overlay,
+#endif
+
+#ifdef V4L2LOOPBACK_WITH_STD
+	.vidioc_s_std			= &vidioc_s_std,
+	.vidioc_g_std			= &vidioc_g_std,
+	.vidioc_querystd		= &vidioc_querystd,
+#endif /* V4L2LOOPBACK_WITH_STD */
+
+	.vidioc_g_parm			= &vidioc_g_parm,
+	.vidioc_s_parm			= &vidioc_s_parm,
+
+	.vidioc_reqbufs			= &vidioc_reqbufs,
+	.vidioc_querybuf		= &vidioc_querybuf,
+	.vidioc_qbuf			= &vidioc_qbuf,
+	.vidioc_dqbuf			= &vidioc_dqbuf,
+
+	.vidioc_streamon		= &vidioc_streamon,
+	.vidioc_streamoff		= &vidioc_streamoff,
+
+#ifdef CONFIG_VIDEO_V4L1_COMPAT
+	.vidiocgmbuf			= &vidiocgmbuf,
+#endif
+
+	.vidioc_subscribe_event		= &vidioc_subscribe_event,
+	.vidioc_unsubscribe_event	= &v4l2_event_unsubscribe,
+	// clang-format on
+};
+
+static int free_device_cb(int id, void *ptr, void *data)
+{
+	struct v4l2_loopback_device *dev = ptr;
+	v4l2_loopback_remove(dev);
+	return 0;
+}
+static void free_devices(void)
+{
+	idr_for_each(&v4l2loopback_index_idr, &free_device_cb, NULL);
+	idr_destroy(&v4l2loopback_index_idr);
+}
+
+static int __init v4l2loopback_init_module(void)
+{
+	const u32 min_width = V4L2LOOPBACK_SIZE_MIN_WIDTH;
+	const u32 min_height = V4L2LOOPBACK_SIZE_MIN_HEIGHT;
+	int err;
+	int i;
+	MARK();
+
+	err = misc_register(&v4l2loopback_misc);
+	if (err < 0)
+		return err;
+
+	if (devices < 0) {
+		devices = 1;
+
+		/* try guessing the devices from the "video_nr" parameter */
+		for (i = MAX_DEVICES - 1; i >= 0; i--) {
+			if (video_nr[i] >= 0) {
+				devices = i + 1;
+				break;
+			}
+		}
+	}
+
+	if (devices > MAX_DEVICES) {
+		devices = MAX_DEVICES;
+		printk(KERN_INFO
+		       "v4l2-loopback init() number of initial devices is "
+		       "limited to: %d\n",
+		       MAX_DEVICES);
+	}
+
+	if (max_buffers > MAX_BUFFERS) {
+		max_buffers = MAX_BUFFERS;
+		printk(KERN_INFO
+		       "v4l2-loopback init() number of buffers is limited "
+		       "to: %d\n",
+		       MAX_BUFFERS);
+	}
+
+	if (max_openers < 0) {
+		printk(KERN_INFO
+		       "v4l2-loopback init() allowing %d openers rather "
+		       "than %d\n",
+		       2, max_openers);
+		max_openers = 2;
+	}
+
+	if (max_width < min_width) {
+		max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH;
+		printk(KERN_INFO "v4l2-loopback init() using max_width %d\n",
+		       max_width);
+	}
+	if (max_height < min_height) {
+		max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT;
+		printk(KERN_INFO "v4l2-loopback init() using max_height %d\n",
+		       max_height);
+	}
+
+	for (i = 0; i < devices; i++) {
+		struct v4l2_loopback_config cfg = {
+			// clang-format off
+			.output_nr		= video_nr[i],
+#ifdef SPLIT_DEVICES
+			.capture_nr		= video_nr[i],
+#endif
+			.min_width		= min_width,
+			.min_height		= min_height,
+			.max_width		= max_width,
+			.max_height		= max_height,
+			.announce_all_caps	= (!exclusive_caps[i]),
+			.max_buffers		= max_buffers,
+			.max_openers		= max_openers,
+			.debug			= debug,
+			// clang-format on
+		};
+		cfg.card_label[0] = 0;
+		if (card_label[i])
+			snprintf(cfg.card_label, sizeof(cfg.card_label), "%s",
+				 card_label[i]);
+		err = v4l2_loopback_add(&cfg, 0);
+		if (err) {
+			free_devices();
+			goto error;
+		}
+	}
+
+	dprintk("module installed\n");
+
+	printk(KERN_INFO "v4l2-loopback driver version %d.%d.%d%s loaded\n",
+	       // clang-format off
+	       (V4L2LOOPBACK_VERSION_CODE >> 16) & 0xff,
+	       (V4L2LOOPBACK_VERSION_CODE >>  8) & 0xff,
+	       (V4L2LOOPBACK_VERSION_CODE      ) & 0xff,
+#ifdef SNAPSHOT_VERSION
+	       " (" __stringify(SNAPSHOT_VERSION) ")"
+#else
+	       ""
+#endif
+	       );
+	// clang-format on
+
+	return 0;
+error:
+	misc_deregister(&v4l2loopback_misc);
+	return err;
+}
+
+static void v4l2loopback_cleanup_module(void)
+{
+	MARK();
+	/* unregister the device -> it deletes /dev/video* */
+	free_devices();
+	/* and get rid of /dev/v4l2loopback */
+	misc_deregister(&v4l2loopback_misc);
+	dprintk("module removed\n");
+}
+
+MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR);
+
+module_init(v4l2loopback_init_module);
+module_exit(v4l2loopback_cleanup_module);
diff --git a/drivers/media/v4l2-core/v4l2loopback.h b/drivers/media/v4l2-core/v4l2loopback.h
new file mode 100644
index 000000000000..8961a98eff58
--- /dev/null
+++ b/drivers/media/v4l2-core/v4l2loopback.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * v4l2loopback.h
+ *
+ * Written by IOhannes m zmölnig, 7/1/20.
+ *
+ * Copyright 2020 by IOhannes m zmölnig.  Redistribution of this file is
+ * permitted under the GNU General Public License.
+ */
+#ifndef _V4L2LOOPBACK_H
+#define _V4L2LOOPBACK_H
+
+#define V4L2LOOPBACK_VERSION_MAJOR 0
+#define V4L2LOOPBACK_VERSION_MINOR 15
+#define V4L2LOOPBACK_VERSION_BUGFIX 3
+
+/* /dev/v4l2loopback interface */
+
+struct v4l2_loopback_config {
+	/**
+         * the device-number (/dev/video<nr>)
+         * V4L2LOOPBACK_CTL_ADD:
+         * setting this to a value<0, will allocate an available one
+         * if nr>=0 and the device already exists, the ioctl will EEXIST
+         * if output_nr and capture_nr are the same, only a single device will be created
+	 * NOTE: currently split-devices (where output_nr and capture_nr differ)
+	 *   are not implemented yet.
+	 *   until then, requesting different device-IDs will result in EINVAL.
+         *
+         * V4L2LOOPBACK_CTL_QUERY:
+         * either both output_nr and capture_nr must refer to the same loopback,
+         * or one (and only one) of them must be -1
+         *
+         */
+	__s32 output_nr;
+	__s32 unused; /*capture_nr;*/
+
+	/**
+         * a nice name for your device
+         * if (*card_label)==0, an automatic name is assigned
+         */
+	char card_label[32];
+
+	/**
+         * allowed frame size
+         * if too low, default values are used
+         */
+	__u32 min_width;
+	__u32 max_width;
+	__u32 min_height;
+	__u32 max_height;
+
+	/**
+         * number of buffers to allocate for the queue
+         * if set to <=0, default values are used
+         */
+	__s32 max_buffers;
+
+	/**
+         * how many consumers are allowed to open this device concurrently
+         * if set to <=0, default values are used
+         */
+	__s32 max_openers;
+
+	/**
+         * set the debugging level for this device
+         */
+	__s32 debug;
+
+	/**
+         * whether to announce OUTPUT/CAPTURE capabilities exclusively
+         * for this device or not
+         * (!exclusive_caps)
+	 * NOTE: this is going to be removed once separate output/capture
+	 *       devices are implemented
+         */
+	__s32 announce_all_caps;
+};
+
+#define V4L2LOOPBACK_CTL_IOCTLMAGIC '~'
+
+/* a pointer to an (unsigned int) that - on success - will hold
+ * the version code of the v4l2loopback module
+ * as returned by KERNEL_VERSION(MAJOR, MINOR, BUGFIX)
+ */
+#define V4L2LOOPBACK_CTL_VERSION _IOR(V4L2LOOPBACK_CTL_IOCTLMAGIC, 0, __u32)
+
+/* a pointer to a (struct v4l2_loopback_config) that has all values you wish to impose on the
+ * to-be-created device set.
+ * if the ptr is NULL, a new device is created with default values at the driver's discretion.
+ *
+ * returns the device_nr of the OUTPUT device (which can be used with V4L2LOOPBACK_CTL_QUERY,
+ * to get more information on the device)
+ */
+#define V4L2LOOPBACK_CTL_ADD \
+	_IOW(V4L2LOOPBACK_CTL_IOCTLMAGIC, 1, struct v4l2_loopback_config)
+
+/* the device-number (either CAPTURE or OUTPUT) associated with the loopback-device */
+#define V4L2LOOPBACK_CTL_REMOVE _IOW(V4L2LOOPBACK_CTL_IOCTLMAGIC, 2, __u32)
+
+/* a pointer to a (struct v4l2_loopback_config) that has output_nr and/or capture_nr set
+ * (the two values must either refer to video-devices associated with the same loopback device
+ *  or exactly one of them must be <0
+ */
+#define V4L2LOOPBACK_CTL_QUERY \
+	_IOWR(V4L2LOOPBACK_CTL_IOCTLMAGIC, 3, struct v4l2_loopback_config)
+
+#endif /* _V4L2LOOPBACK_H */
diff --git a/drivers/media/v4l2-core/v4l2loopback_formats.h b/drivers/media/v4l2-core/v4l2loopback_formats.h
new file mode 100644
index 000000000000..d855a3796554
--- /dev/null
+++ b/drivers/media/v4l2-core/v4l2loopback_formats.h
@@ -0,0 +1,445 @@
+static const struct v4l2l_format formats[] = {
+#ifndef V4L2_PIX_FMT_VP9
+#define V4L2_PIX_FMT_VP9 v4l2_fourcc('V', 'P', '9', '0')
+#endif
+#ifndef V4L2_PIX_FMT_HEVC
+#define V4L2_PIX_FMT_HEVC v4l2_fourcc('H', 'E', 'V', 'C')
+#endif
+
+	/* here come the packed formats */
+	{
+		.name = "32 bpp RGB, le",
+		.fourcc = V4L2_PIX_FMT_BGR32,
+		.depth = 32,
+		.flags = 0,
+	},
+	{
+		.name = "32 bpp RGB, be",
+		.fourcc = V4L2_PIX_FMT_RGB32,
+		.depth = 32,
+		.flags = 0,
+	},
+	{
+		.name = "24 bpp RGB, le",
+		.fourcc = V4L2_PIX_FMT_BGR24,
+		.depth = 24,
+		.flags = 0,
+	},
+	{
+		.name = "24 bpp RGB, be",
+		.fourcc = V4L2_PIX_FMT_RGB24,
+		.depth = 24,
+		.flags = 0,
+	},
+#ifdef V4L2_PIX_FMT_ABGR32
+	{
+		.name = "32 bpp RGBA, le",
+		.fourcc = V4L2_PIX_FMT_ABGR32,
+		.depth = 32,
+		.flags = 0,
+	},
+#endif
+#ifdef V4L2_PIX_FMT_RGBA32
+	{
+		.name = "32 bpp RGBA",
+		.fourcc = V4L2_PIX_FMT_RGBA32,
+		.depth = 32,
+		.flags = 0,
+	},
+#endif
+#ifdef V4L2_PIX_FMT_RGB332
+	{
+		.name = "8 bpp RGB-3-3-2",
+		.fourcc = V4L2_PIX_FMT_RGB332,
+		.depth = 8,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_RGB332 */
+#ifdef V4L2_PIX_FMT_RGB444
+	{
+		.name = "16 bpp RGB (xxxxrrrr ggggbbbb)",
+		.fourcc = V4L2_PIX_FMT_RGB444,
+		.depth = 16,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_RGB444 */
+#ifdef V4L2_PIX_FMT_RGB555
+	{
+		.name = "16 bpp RGB-5-5-5",
+		.fourcc = V4L2_PIX_FMT_RGB555,
+		.depth = 16,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_RGB555 */
+#ifdef V4L2_PIX_FMT_RGB565
+	{
+		.name = "16 bpp RGB-5-6-5",
+		.fourcc = V4L2_PIX_FMT_RGB565,
+		.depth = 16,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_RGB565 */
+#ifdef V4L2_PIX_FMT_RGB555X
+	{
+		.name = "16 bpp RGB-5-5-5 BE",
+		.fourcc = V4L2_PIX_FMT_RGB555X,
+		.depth = 16,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_RGB555X */
+#ifdef V4L2_PIX_FMT_RGB565X
+	{
+		.name = "16 bpp RGB-5-6-5 BE",
+		.fourcc = V4L2_PIX_FMT_RGB565X,
+		.depth = 16,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_RGB565X */
+#ifdef V4L2_PIX_FMT_BGR666
+	{
+		.name = "18 bpp BGR-6-6-6",
+		.fourcc = V4L2_PIX_FMT_BGR666,
+		.depth = 18,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_BGR666 */
+	{
+		.name = "4:2:2, packed, YUYV",
+		.fourcc = V4L2_PIX_FMT_YUYV,
+		.depth = 16,
+		.flags = 0,
+	},
+	{
+		.name = "4:2:2, packed, UYVY",
+		.fourcc = V4L2_PIX_FMT_UYVY,
+		.depth = 16,
+		.flags = 0,
+	},
+#ifdef V4L2_PIX_FMT_YVYU
+	{
+		.name = "4:2:2, packed YVYU",
+		.fourcc = V4L2_PIX_FMT_YVYU,
+		.depth = 16,
+		.flags = 0,
+	},
+#endif
+#ifdef V4L2_PIX_FMT_VYUY
+	{
+		.name = "4:2:2, packed VYUY",
+		.fourcc = V4L2_PIX_FMT_VYUY,
+		.depth = 16,
+		.flags = 0,
+	},
+#endif
+	{
+		.name = "4:2:2, packed YYUV",
+		.fourcc = V4L2_PIX_FMT_YYUV,
+		.depth = 16,
+		.flags = 0,
+	},
+	{
+		.name = "YUV-8-8-8-8",
+		.fourcc = V4L2_PIX_FMT_YUV32,
+		.depth = 32,
+		.flags = 0,
+	},
+	{
+		.name = "8 bpp, Greyscale",
+		.fourcc = V4L2_PIX_FMT_GREY,
+		.depth = 8,
+		.flags = 0,
+	},
+#ifdef V4L2_PIX_FMT_Y4
+	{
+		.name = "4 bpp Greyscale",
+		.fourcc = V4L2_PIX_FMT_Y4,
+		.depth = 4,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_Y4 */
+#ifdef V4L2_PIX_FMT_Y6
+	{
+		.name = "6 bpp Greyscale",
+		.fourcc = V4L2_PIX_FMT_Y6,
+		.depth = 6,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_Y6 */
+#ifdef V4L2_PIX_FMT_Y10
+	{
+		.name = "10 bpp Greyscale",
+		.fourcc = V4L2_PIX_FMT_Y10,
+		.depth = 10,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_Y10 */
+#ifdef V4L2_PIX_FMT_Y12
+	{
+		.name = "12 bpp Greyscale",
+		.fourcc = V4L2_PIX_FMT_Y12,
+		.depth = 12,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_Y12 */
+	{
+		.name = "16 bpp, Greyscale",
+		.fourcc = V4L2_PIX_FMT_Y16,
+		.depth = 16,
+		.flags = 0,
+	},
+#ifdef V4L2_PIX_FMT_YUV444
+	{
+		.name = "16 bpp xxxxyyyy uuuuvvvv",
+		.fourcc = V4L2_PIX_FMT_YUV444,
+		.depth = 16,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_YUV444 */
+#ifdef V4L2_PIX_FMT_YUV555
+	{
+		.name = "16 bpp YUV-5-5-5",
+		.fourcc = V4L2_PIX_FMT_YUV555,
+		.depth = 16,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_YUV555 */
+#ifdef V4L2_PIX_FMT_YUV565
+	{
+		.name = "16 bpp YUV-5-6-5",
+		.fourcc = V4L2_PIX_FMT_YUV565,
+		.depth = 16,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_YUV565 */
+
+/* bayer formats */
+#ifdef V4L2_PIX_FMT_SRGGB8
+	{
+		.name = "Bayer RGGB 8bit",
+		.fourcc = V4L2_PIX_FMT_SRGGB8,
+		.depth = 8,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_SRGGB8 */
+#ifdef V4L2_PIX_FMT_SGRBG8
+	{
+		.name = "Bayer GRBG 8bit",
+		.fourcc = V4L2_PIX_FMT_SGRBG8,
+		.depth = 8,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_SGRBG8 */
+#ifdef V4L2_PIX_FMT_SGBRG8
+	{
+		.name = "Bayer GBRG 8bit",
+		.fourcc = V4L2_PIX_FMT_SGBRG8,
+		.depth = 8,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_SGBRG8 */
+#ifdef V4L2_PIX_FMT_SBGGR8
+	{
+		.name = "Bayer BA81 8bit",
+		.fourcc = V4L2_PIX_FMT_SBGGR8,
+		.depth = 8,
+		.flags = 0,
+	},
+#endif /* V4L2_PIX_FMT_SBGGR8 */
+
+	/* here come the planar formats */
+	{
+		.name = "4:1:0, planar, Y-Cr-Cb",
+		.fourcc = V4L2_PIX_FMT_YVU410,
+		.depth = 9,
+		.flags = FORMAT_FLAGS_PLANAR,
+	},
+	{
+		.name = "4:2:0, planar, Y-Cr-Cb",
+		.fourcc = V4L2_PIX_FMT_YVU420,
+		.depth = 12,
+		.flags = FORMAT_FLAGS_PLANAR,
+	},
+	{
+		.name = "4:1:0, planar, Y-Cb-Cr",
+		.fourcc = V4L2_PIX_FMT_YUV410,
+		.depth = 9,
+		.flags = FORMAT_FLAGS_PLANAR,
+	},
+	{
+		.name = "4:2:0, planar, Y-Cb-Cr",
+		.fourcc = V4L2_PIX_FMT_YUV420,
+		.depth = 12,
+		.flags = FORMAT_FLAGS_PLANAR,
+	},
+#ifdef V4L2_PIX_FMT_YUV422P
+	{
+		.name = "16 bpp YVU422 planar",
+		.fourcc = V4L2_PIX_FMT_YUV422P,
+		.depth = 16,
+		.flags = FORMAT_FLAGS_PLANAR,
+	},
+#endif /* V4L2_PIX_FMT_YUV422P */
+#ifdef V4L2_PIX_FMT_YUV411P
+	{
+		.name = "16 bpp YVU411 planar",
+		.fourcc = V4L2_PIX_FMT_YUV411P,
+		.depth = 16,
+		.flags = FORMAT_FLAGS_PLANAR,
+	},
+#endif /* V4L2_PIX_FMT_YUV411P */
+#ifdef V4L2_PIX_FMT_Y41P
+	{
+		.name = "12 bpp YUV 4:1:1",
+		.fourcc = V4L2_PIX_FMT_Y41P,
+		.depth = 12,
+		.flags = FORMAT_FLAGS_PLANAR,
+	},
+#endif /* V4L2_PIX_FMT_Y41P */
+#ifdef V4L2_PIX_FMT_NV12
+	{
+		.name = "12 bpp Y/CbCr 4:2:0 ",
+		.fourcc = V4L2_PIX_FMT_NV12,
+		.depth = 12,
+		.flags = FORMAT_FLAGS_PLANAR,
+	},
+#endif /* V4L2_PIX_FMT_NV12 */
+
+/* here come the compressed formats */
+
+#ifdef V4L2_PIX_FMT_MJPEG
+	{
+		.name = "Motion-JPEG",
+		.fourcc = V4L2_PIX_FMT_MJPEG,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_MJPEG */
+#ifdef V4L2_PIX_FMT_JPEG
+	{
+		.name = "JFIF JPEG",
+		.fourcc = V4L2_PIX_FMT_JPEG,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_JPEG */
+#ifdef V4L2_PIX_FMT_DV
+	{
+		.name = "DV1394",
+		.fourcc = V4L2_PIX_FMT_DV,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_DV */
+#ifdef V4L2_PIX_FMT_MPEG
+	{
+		.name = "MPEG-1/2/4 Multiplexed",
+		.fourcc = V4L2_PIX_FMT_MPEG,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_MPEG */
+#ifdef V4L2_PIX_FMT_H264
+	{
+		.name = "H264 with start codes",
+		.fourcc = V4L2_PIX_FMT_H264,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_H264 */
+#ifdef V4L2_PIX_FMT_H264_NO_SC
+	{
+		.name = "H264 without start codes",
+		.fourcc = V4L2_PIX_FMT_H264_NO_SC,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_H264_NO_SC */
+#ifdef V4L2_PIX_FMT_H264_MVC
+	{
+		.name = "H264 MVC",
+		.fourcc = V4L2_PIX_FMT_H264_MVC,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_H264_MVC */
+#ifdef V4L2_PIX_FMT_H263
+	{
+		.name = "H263",
+		.fourcc = V4L2_PIX_FMT_H263,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_H263 */
+#ifdef V4L2_PIX_FMT_MPEG1
+	{
+		.name = "MPEG-1 ES",
+		.fourcc = V4L2_PIX_FMT_MPEG1,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_MPEG1 */
+#ifdef V4L2_PIX_FMT_MPEG2
+	{
+		.name = "MPEG-2 ES",
+		.fourcc = V4L2_PIX_FMT_MPEG2,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_MPEG2 */
+#ifdef V4L2_PIX_FMT_MPEG4
+	{
+		.name = "MPEG-4 part 2 ES",
+		.fourcc = V4L2_PIX_FMT_MPEG4,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_MPEG4 */
+#ifdef V4L2_PIX_FMT_XVID
+	{
+		.name = "Xvid",
+		.fourcc = V4L2_PIX_FMT_XVID,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_XVID */
+#ifdef V4L2_PIX_FMT_VC1_ANNEX_G
+	{
+		.name = "SMPTE 421M Annex G compliant stream",
+		.fourcc = V4L2_PIX_FMT_VC1_ANNEX_G,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_VC1_ANNEX_G */
+#ifdef V4L2_PIX_FMT_VC1_ANNEX_L
+	{
+		.name = "SMPTE 421M Annex L compliant stream",
+		.fourcc = V4L2_PIX_FMT_VC1_ANNEX_L,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_VC1_ANNEX_L */
+#ifdef V4L2_PIX_FMT_VP8
+	{
+		.name = "VP8",
+		.fourcc = V4L2_PIX_FMT_VP8,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_VP8 */
+#ifdef V4L2_PIX_FMT_VP9
+	{
+		.name = "VP9",
+		.fourcc = V4L2_PIX_FMT_VP9,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_VP9 */
+#ifdef V4L2_PIX_FMT_HEVC
+	{
+		.name = "HEVC",
+		.fourcc = V4L2_PIX_FMT_HEVC,
+		.depth = 32,
+		.flags = FORMAT_FLAGS_COMPRESSED,
+	},
+#endif /* V4L2_PIX_FMT_HEVC */
+};
diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile
index 229929a945c2..79811c68a278 100644
--- a/drivers/pci/controller/Makefile
+++ b/drivers/pci/controller/Makefile
@@ -1,4 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
+ifdef CONFIG_X86_64
+ifdef CONFIG_SATA_AHCI
+obj-y += intel-nvme-remap.o
+endif
+endif
+
 obj-$(CONFIG_PCIE_CADENCE) += cadence/
 obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o
 obj-$(CONFIG_PCI_IXP4XX) += pci-ixp4xx.o
diff --git a/drivers/pci/controller/intel-nvme-remap.c b/drivers/pci/controller/intel-nvme-remap.c
new file mode 100644
index 000000000000..e105e6f5cc91
--- /dev/null
+++ b/drivers/pci/controller/intel-nvme-remap.c
@@ -0,0 +1,462 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel remapped NVMe device support.
+ *
+ * Copyright (c) 2019 Endless Mobile, Inc.
+ * Author: Daniel Drake <drake@endlessm.com>
+ *
+ * Some products ship by default with the SATA controller in "RAID" or
+ * "Intel RST Premium With Intel Optane System Acceleration" mode. Under this
+ * mode, which we refer to as "remapped NVMe" mode, any installed NVMe
+ * devices disappear from the PCI bus, and instead their I/O memory becomes
+ * available within the AHCI device BARs.
+ *
+ * This scheme is understood to be a way of avoiding usage of the standard
+ * Windows NVMe driver under that OS, instead mandating usage of Intel's
+ * driver instead, which has better power management, and presumably offers
+ * some RAID/disk-caching solutions too.
+ *
+ * Here in this driver, we support the remapped NVMe mode by claiming the
+ * AHCI device and creating a fake PCIe root port. On the new bus, the
+ * original AHCI device is exposed with only minor tweaks. Then, fake PCI
+ * devices corresponding to the remapped NVMe devices are created. The usual
+ * ahci and nvme drivers are then expected to bind to these devices and
+ * operate as normal.
+ *
+ * The PCI configuration space for the NVMe devices is completely
+ * unavailable, so we fake a minimal one and hope for the best.
+ *
+ * Interrupts are shared between the AHCI and NVMe devices. For simplicity,
+ * we only support the legacy interrupt here, although MSI support
+ * could potentially be added later.
+ */
+
+#define MODULE_NAME "intel-nvme-remap"
+
+#include <linux/ahci-remap.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#define AHCI_PCI_BAR_STANDARD 5
+
+struct nvme_remap_dev {
+	struct pci_dev		*dev;		/* AHCI device */
+	struct pci_bus		*bus;		/* our fake PCI bus */
+	struct pci_sysdata	sysdata;
+	int			irq_base;	/* our fake interrupts */
+
+	/*
+	 * When we detect an all-ones write to a BAR register, this flag
+	 * is set, so that we return the BAR size on the next read (a
+	 * standard PCI behaviour).
+	 * This includes the assumption that an all-ones BAR write is
+	 * immediately followed by a read of the same register.
+	 */
+	bool			bar_sizing;
+
+	/*
+	 * Resources copied from the AHCI device, to be regarded as
+	 * resources on our fake bus.
+	 */
+	struct resource		ahci_resources[PCI_NUM_RESOURCES];
+
+	/* Resources corresponding to the NVMe devices. */
+	struct resource		remapped_dev_mem[AHCI_MAX_REMAP];
+
+	/* Number of remapped NVMe devices found. */
+	int			num_remapped_devices;
+};
+
+static inline struct nvme_remap_dev *nrdev_from_bus(struct pci_bus *bus)
+{
+	return container_of(bus->sysdata, struct nvme_remap_dev, sysdata);
+}
+
+
+/******** PCI configuration space **********/
+
+/*
+ * Helper macros for tweaking returned contents of PCI configuration space.
+ *
+ * value contains len bytes of data read from reg.
+ * If fixup_reg is included in that range, fix up the contents of that
+ * register to fixed_value.
+ */
+#define NR_FIX8(fixup_reg, fixed_value) do { \
+		if (reg <= fixup_reg && fixup_reg < reg + len) \
+			((u8 *) value)[fixup_reg - reg] = (u8) (fixed_value); \
+	} while (0)
+
+#define NR_FIX16(fixup_reg, fixed_value) do { \
+		NR_FIX8(fixup_reg, fixed_value); \
+		NR_FIX8(fixup_reg + 1, fixed_value >> 8); \
+	} while (0)
+
+#define NR_FIX24(fixup_reg, fixed_value) do { \
+		NR_FIX8(fixup_reg, fixed_value); \
+		NR_FIX8(fixup_reg + 1, fixed_value >> 8); \
+		NR_FIX8(fixup_reg + 2, fixed_value >> 16); \
+	} while (0)
+
+#define NR_FIX32(fixup_reg, fixed_value) do { \
+		NR_FIX16(fixup_reg, (u16) fixed_value); \
+		NR_FIX16(fixup_reg + 2, fixed_value >> 16); \
+	} while (0)
+
+/*
+ * Read PCI config space of the slot 0 (AHCI) device.
+ * We pass through the read request to the underlying device, but
+ * tweak the results in some cases.
+ */
+static int nvme_remap_pci_read_slot0(struct pci_bus *bus, int reg,
+				     int len, u32 *value)
+{
+	struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
+	struct pci_bus *ahci_dev_bus = nrdev->dev->bus;
+	int ret;
+
+	ret = ahci_dev_bus->ops->read(ahci_dev_bus, nrdev->dev->devfn,
+				      reg, len, value);
+	if (ret)
+		return ret;
+
+	/*
+	 * Adjust the device class, to prevent this driver from attempting to
+	 * additionally probe the device we're simulating here.
+	 */
+	NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_SATA_AHCI);
+
+	/*
+	 * Unset interrupt pin, otherwise ACPI tries to find routing
+	 * info for our virtual IRQ, fails, and complains.
+	 */
+	NR_FIX8(PCI_INTERRUPT_PIN, 0);
+
+	/*
+	 * Truncate the AHCI BAR to not include the region that covers the
+	 * hidden devices. This will cause the ahci driver to successfully
+	 * probe th new device (instead of handing it over to this driver).
+	 */
+	if (nrdev->bar_sizing) {
+		NR_FIX32(PCI_BASE_ADDRESS_5, ~(SZ_16K - 1));
+		nrdev->bar_sizing = false;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+/*
+ * Read PCI config space of a remapped device.
+ * Since the original PCI config space is inaccessible, we provide a minimal,
+ * fake config space instead.
+ */
+static int nvme_remap_pci_read_remapped(struct pci_bus *bus, unsigned int port,
+					int reg, int len, u32 *value)
+{
+	struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
+	struct resource *remapped_mem;
+
+	if (port > nrdev->num_remapped_devices)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	*value = 0;
+	remapped_mem = &nrdev->remapped_dev_mem[port - 1];
+
+	/* Set a Vendor ID, otherwise Linux assumes no device is present */
+	NR_FIX16(PCI_VENDOR_ID, PCI_VENDOR_ID_INTEL);
+
+	/* Always appear on & bus mastering */
+	NR_FIX16(PCI_COMMAND, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+
+	/* Set class so that nvme driver probes us */
+	NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_EXPRESS);
+
+	if (nrdev->bar_sizing) {
+		NR_FIX32(PCI_BASE_ADDRESS_0,
+			 ~(resource_size(remapped_mem) - 1));
+		nrdev->bar_sizing = false;
+	} else {
+		resource_size_t mem_start = remapped_mem->start;
+
+		mem_start |= PCI_BASE_ADDRESS_MEM_TYPE_64;
+		NR_FIX32(PCI_BASE_ADDRESS_0, mem_start);
+		mem_start >>= 32;
+		NR_FIX32(PCI_BASE_ADDRESS_1, mem_start);
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+/* Read PCI configuration space. */
+static int nvme_remap_pci_read(struct pci_bus *bus, unsigned int devfn,
+			       int reg, int len, u32 *value)
+{
+	if (PCI_SLOT(devfn) == 0)
+		return nvme_remap_pci_read_slot0(bus, reg, len, value);
+	else
+		return nvme_remap_pci_read_remapped(bus, PCI_SLOT(devfn),
+						    reg, len, value);
+}
+
+/*
+ * Write PCI config space of the slot 0 (AHCI) device.
+ * Apart from the special case of BAR sizing, we disable all writes.
+ * Otherwise, the ahci driver could make changes (e.g. unset PCI bus master)
+ * that would affect the operation of the NVMe devices.
+ */
+static int nvme_remap_pci_write_slot0(struct pci_bus *bus, int reg,
+				      int len, u32 value)
+{
+	struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
+	struct pci_bus *ahci_dev_bus = nrdev->dev->bus;
+
+	if (reg >= PCI_BASE_ADDRESS_0 && reg <= PCI_BASE_ADDRESS_5) {
+		/*
+		 * Writing all-ones to a BAR means that the size of the
+		 * memory region is being checked. Flag this so that we can
+		 * reply with an appropriate size on the next read.
+		 */
+		if (value == ~0)
+			nrdev->bar_sizing = true;
+
+		return ahci_dev_bus->ops->write(ahci_dev_bus,
+						nrdev->dev->devfn,
+						reg, len, value);
+	}
+
+	return PCIBIOS_SET_FAILED;
+}
+
+/*
+ * Write PCI config space of a remapped device.
+ * Since the original PCI config space is inaccessible, we reject all
+ * writes, except for the special case of BAR probing.
+ */
+static int nvme_remap_pci_write_remapped(struct pci_bus *bus,
+					 unsigned int port,
+					 int reg, int len, u32 value)
+{
+	struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
+
+	if (port > nrdev->num_remapped_devices)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	/*
+	 * Writing all-ones to a BAR means that the size of the memory
+	 * region is being checked. Flag this so that we can reply with
+	 * an appropriate size on the next read.
+	 */
+	if (value == ~0 && reg >= PCI_BASE_ADDRESS_0
+			&& reg <= PCI_BASE_ADDRESS_5) {
+		nrdev->bar_sizing = true;
+		return PCIBIOS_SUCCESSFUL;
+	}
+
+	return PCIBIOS_SET_FAILED;
+}
+
+/* Write PCI configuration space. */
+static int nvme_remap_pci_write(struct pci_bus *bus, unsigned int devfn,
+				int reg, int len, u32 value)
+{
+	if (PCI_SLOT(devfn) == 0)
+		return nvme_remap_pci_write_slot0(bus, reg, len, value);
+	else
+		return nvme_remap_pci_write_remapped(bus, PCI_SLOT(devfn),
+						     reg, len, value);
+}
+
+static struct pci_ops nvme_remap_pci_ops = {
+	.read	= nvme_remap_pci_read,
+	.write	= nvme_remap_pci_write,
+};
+
+
+/******** Initialization & exit **********/
+
+/*
+ * Find a PCI domain ID to use for our fake bus.
+ * Start at 0x10000 to not clash with ACPI _SEG domains (16 bits).
+ */
+static int find_free_domain(void)
+{
+	int domain = 0xffff;
+	struct pci_bus *bus = NULL;
+
+	while ((bus = pci_find_next_bus(bus)) != NULL)
+		domain = max_t(int, domain, pci_domain_nr(bus));
+
+	return domain + 1;
+}
+
+static int find_remapped_devices(struct nvme_remap_dev *nrdev,
+				 struct list_head *resources)
+{
+	void __iomem *mmio;
+	int i, count = 0;
+	u32 cap;
+
+	mmio = pcim_iomap(nrdev->dev, AHCI_PCI_BAR_STANDARD,
+			  pci_resource_len(nrdev->dev,
+					   AHCI_PCI_BAR_STANDARD));
+	if (!mmio)
+		return -ENODEV;
+
+	/* Check if this device might have remapped nvme devices. */
+	if (pci_resource_len(nrdev->dev, AHCI_PCI_BAR_STANDARD) < SZ_512K ||
+	    !(readl(mmio + AHCI_VSCAP) & 1))
+		return -ENODEV;
+
+	cap = readq(mmio + AHCI_REMAP_CAP);
+	for (i = AHCI_MAX_REMAP-1; i >= 0; i--) {
+		struct resource *remapped_mem;
+
+		if ((cap & (1 << i)) == 0)
+			continue;
+		if (readl(mmio + ahci_remap_dcc(i))
+				!= PCI_CLASS_STORAGE_EXPRESS)
+			continue;
+
+		/* We've found a remapped device */
+		remapped_mem = &nrdev->remapped_dev_mem[count++];
+		remapped_mem->start =
+			pci_resource_start(nrdev->dev, AHCI_PCI_BAR_STANDARD)
+			+ ahci_remap_base(i);
+		remapped_mem->end = remapped_mem->start
+			+ AHCI_REMAP_N_SIZE - 1;
+		remapped_mem->flags = IORESOURCE_MEM | IORESOURCE_PCI_FIXED;
+		pci_add_resource(resources, remapped_mem);
+	}
+
+	pcim_iounmap(nrdev->dev, mmio);
+
+	if (count == 0)
+		return -ENODEV;
+
+	nrdev->num_remapped_devices = count;
+	dev_info(&nrdev->dev->dev, "Found %d remapped NVMe devices\n",
+		 nrdev->num_remapped_devices);
+	return 0;
+}
+
+static void nvme_remap_remove_root_bus(void *data)
+{
+	struct pci_bus *bus = data;
+
+	pci_stop_root_bus(bus);
+	pci_remove_root_bus(bus);
+}
+
+static int nvme_remap_probe(struct pci_dev *dev,
+			    const struct pci_device_id *id)
+{
+	struct nvme_remap_dev *nrdev;
+	LIST_HEAD(resources);
+	int i;
+	int ret;
+	struct pci_dev *child;
+
+	nrdev = devm_kzalloc(&dev->dev, sizeof(*nrdev), GFP_KERNEL);
+	nrdev->sysdata.domain = find_free_domain();
+	nrdev->sysdata.nvme_remap_dev = dev;
+	nrdev->dev = dev;
+	pci_set_drvdata(dev, nrdev);
+
+	ret = pcim_enable_device(dev);
+	if (ret < 0)
+		return ret;
+
+	pci_set_master(dev);
+
+	ret = find_remapped_devices(nrdev, &resources);
+	if (ret)
+		return ret;
+
+	/* Add resources from the original AHCI device */
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		struct resource *res = &dev->resource[i];
+
+		if (res->start) {
+			struct resource *nr_res = &nrdev->ahci_resources[i];
+
+			nr_res->start = res->start;
+			nr_res->end = res->end;
+			nr_res->flags = res->flags;
+			pci_add_resource(&resources, nr_res);
+		}
+	}
+
+	/* Create virtual interrupts */
+	nrdev->irq_base = devm_irq_alloc_descs(&dev->dev, -1, 0,
+					       nrdev->num_remapped_devices + 1,
+					       0);
+	if (nrdev->irq_base < 0)
+		return nrdev->irq_base;
+
+	/* Create and populate PCI bus */
+	nrdev->bus = pci_create_root_bus(&dev->dev, 0, &nvme_remap_pci_ops,
+					 &nrdev->sysdata, &resources);
+	if (!nrdev->bus)
+		return -ENODEV;
+
+	if (devm_add_action_or_reset(&dev->dev, nvme_remap_remove_root_bus,
+				     nrdev->bus))
+		return -ENOMEM;
+
+	/* We don't support sharing MSI interrupts between these devices */
+	nrdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
+
+	pci_scan_child_bus(nrdev->bus);
+
+	list_for_each_entry(child, &nrdev->bus->devices, bus_list) {
+		/*
+		 * Prevent PCI core from trying to move memory BARs around.
+		 * The hidden NVMe devices are at fixed locations.
+		 */
+		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+			struct resource *res = &child->resource[i];
+
+			if (res->flags & IORESOURCE_MEM)
+				res->flags |= IORESOURCE_PCI_FIXED;
+		}
+
+		/* Share the legacy IRQ between all devices */
+		child->irq = dev->irq;
+	}
+
+	pci_assign_unassigned_bus_resources(nrdev->bus);
+	pci_bus_add_devices(nrdev->bus);
+
+	return 0;
+}
+
+static const struct pci_device_id nvme_remap_ids[] = {
+	/*
+	 * Match all Intel RAID controllers.
+	 *
+	 * There's overlap here with the set of devices detected by the ahci
+	 * driver, but ahci will only successfully probe when there
+	 * *aren't* any remapped NVMe devices, and this driver will only
+	 * successfully probe when there *are* remapped NVMe devices that
+	 * need handling.
+	 */
+	{
+		PCI_VDEVICE(INTEL, PCI_ANY_ID),
+		.class = PCI_CLASS_STORAGE_RAID << 8,
+		.class_mask = 0xffffff00,
+	},
+	{0,}
+};
+MODULE_DEVICE_TABLE(pci, nvme_remap_ids);
+
+static struct pci_driver nvme_remap_drv = {
+	.name		= MODULE_NAME,
+	.id_table	= nvme_remap_ids,
+	.probe		= nvme_remap_probe,
+};
+module_pci_driver(nvme_remap_drv);
+
+MODULE_AUTHOR("Daniel Drake <drake@endlessm.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 280cd50d693b..465888670244 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3748,6 +3748,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
 	dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
 }
 
+static bool acs_on_downstream;
+static bool acs_on_multifunction;
+
+#define NUM_ACS_IDS 16
+struct acs_on_id {
+	unsigned short vendor;
+	unsigned short device;
+};
+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
+static u8 max_acs_id;
+
+static __init int pcie_acs_override_setup(char *p)
+{
+	if (!p)
+		return -EINVAL;
+
+	while (*p) {
+		if (!strncmp(p, "downstream", 10))
+			acs_on_downstream = true;
+		if (!strncmp(p, "multifunction", 13))
+			acs_on_multifunction = true;
+		if (!strncmp(p, "id:", 3)) {
+			char opt[5];
+			int ret;
+			long val;
+
+			if (max_acs_id >= NUM_ACS_IDS - 1) {
+				pr_warn("Out of PCIe ACS override slots (%d)\n",
+						NUM_ACS_IDS);
+				goto next;
+			}
+
+			p += 3;
+			snprintf(opt, 5, "%s", p);
+			ret = kstrtol(opt, 16, &val);
+			if (ret) {
+				pr_warn("PCIe ACS ID parse error %d\n", ret);
+				goto next;
+			}
+			acs_on_ids[max_acs_id].vendor = val;
+
+			p += strcspn(p, ":");
+			if (*p != ':') {
+				pr_warn("PCIe ACS invalid ID\n");
+				goto next;
+			}
+
+			p++;
+			snprintf(opt, 5, "%s", p);
+			ret = kstrtol(opt, 16, &val);
+			if (ret) {
+				pr_warn("PCIe ACS ID parse error %d\n", ret);
+				goto next;
+			}
+			acs_on_ids[max_acs_id].device = val;
+			max_acs_id++;
+		}
+next:
+		p += strcspn(p, ",");
+		if (*p == ',')
+			p++;
+	}
+
+	if (acs_on_downstream || acs_on_multifunction || max_acs_id)
+		pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
+
+	return 0;
+}
+early_param("pcie_acs_override", pcie_acs_override_setup);
+
+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
+{
+	int i;
+
+	/* Never override ACS for legacy devices or devices with ACS caps */
+	if (!pci_is_pcie(dev) ||
+		pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
+			return -ENOTTY;
+
+	for (i = 0; i < max_acs_id; i++)
+		if (acs_on_ids[i].vendor == dev->vendor &&
+			acs_on_ids[i].device == dev->device)
+				return 1;
+
+	switch (pci_pcie_type(dev)) {
+	case PCI_EXP_TYPE_DOWNSTREAM:
+	case PCI_EXP_TYPE_ROOT_PORT:
+		if (acs_on_downstream)
+			return 1;
+		break;
+	case PCI_EXP_TYPE_ENDPOINT:
+	case PCI_EXP_TYPE_UPSTREAM:
+	case PCI_EXP_TYPE_LEG_END:
+	case PCI_EXP_TYPE_RC_END:
+		if (acs_on_multifunction && dev->multifunction)
+			return 1;
+	}
+
+	return -ENOTTY;
+}
 /*
  * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be
  * prevented for those affected devices.
@@ -5195,6 +5295,7 @@ static const struct pci_dev_acs_enabled {
 	{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
 	/* Wangxun nics */
 	{ PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs },
+	{ PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
 	{ 0 }
 };
 
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 19d0884479a2..b37d8fcc1205 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1524,4 +1524,6 @@ endif # SCSI_LOWLEVEL
 
 source "drivers/scsi/device_handler/Kconfig"
 
+source "drivers/scsi/vhba/Kconfig"
+
 endmenu
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 16de3e41f94c..4e88f6e3e67b 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -152,6 +152,7 @@ obj-$(CONFIG_CHR_DEV_SCH)	+= ch.o
 obj-$(CONFIG_SCSI_ENCLOSURE)	+= ses.o
 
 obj-$(CONFIG_SCSI_HISI_SAS) += hisi_sas/
+obj-$(CONFIG_VHBA)		+= vhba/
 
 # This goes last, so that "real" scsi devices probe earlier
 obj-$(CONFIG_SCSI_DEBUG)	+= scsi_debug.o
diff --git a/drivers/scsi/vhba/Kconfig b/drivers/scsi/vhba/Kconfig
new file mode 100644
index 000000000000..e70a381fe3df
--- /dev/null
+++ b/drivers/scsi/vhba/Kconfig
@@ -0,0 +1,9 @@
+config VHBA
+	tristate "Virtual (SCSI) Host Bus Adapter"
+	depends on SCSI
+	help
+	  This is the in-kernel part of CDEmu, a CD/DVD-ROM device
+	  emulator.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called vhba.
diff --git a/drivers/scsi/vhba/Makefile b/drivers/scsi/vhba/Makefile
new file mode 100644
index 000000000000..560d24689b75
--- /dev/null
+++ b/drivers/scsi/vhba/Makefile
@@ -0,0 +1,4 @@
+VHBA_VERSION := 20250329
+
+obj-$(CONFIG_VHBA)		+= vhba.o
+ccflags-y := -DVHBA_VERSION=\"$(VHBA_VERSION)\" -Werror
diff --git a/drivers/scsi/vhba/vhba.c b/drivers/scsi/vhba/vhba.c
new file mode 100644
index 000000000000..64b09ece2e5a
--- /dev/null
+++ b/drivers/scsi/vhba/vhba.c
@@ -0,0 +1,1132 @@
+/*
+ * vhba.c
+ *
+ * Copyright (C) 2007-2012 Chia-I Wu <olvaffe AT gmail DOT com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define pr_fmt(fmt) "vhba: " fmt
+
+#include <linux/version.h>
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/fs.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+#include <linux/sched/signal.h>
+#else
+#include <linux/sched.h>
+#endif
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <asm/uaccess.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_tcq.h>
+
+
+MODULE_AUTHOR("Chia-I Wu");
+MODULE_VERSION(VHBA_VERSION);
+MODULE_DESCRIPTION("Virtual SCSI HBA");
+MODULE_LICENSE("GPL");
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0)
+#define sdev_dbg(sdev, fmt, a...) \
+    dev_dbg(&(sdev)->sdev_gendev, fmt, ##a)
+#define scmd_dbg(scmd, fmt, a...) \
+    dev_dbg(&(scmd)->device->sdev_gendev, fmt, ##a)
+#endif
+
+#define VHBA_MAX_SECTORS_PER_IO 256
+#define VHBA_MAX_BUS 16
+#define VHBA_MAX_ID 16
+#define VHBA_MAX_DEVICES (VHBA_MAX_BUS * (VHBA_MAX_ID-1))
+#define VHBA_KBUF_SIZE PAGE_SIZE
+
+#define DATA_TO_DEVICE(dir) ((dir) == DMA_TO_DEVICE || (dir) == DMA_BIDIRECTIONAL)
+#define DATA_FROM_DEVICE(dir) ((dir) == DMA_FROM_DEVICE || (dir) == DMA_BIDIRECTIONAL)
+
+
+static int vhba_can_queue = 32;
+module_param_named(can_queue, vhba_can_queue, int, 0);
+
+
+enum vhba_req_state {
+    VHBA_REQ_FREE,
+    VHBA_REQ_PENDING,
+    VHBA_REQ_READING,
+    VHBA_REQ_SENT,
+    VHBA_REQ_WRITING,
+};
+
+struct vhba_command {
+    struct scsi_cmnd *cmd;
+    /* metatags are per-host. not to be confused with
+       queue tags that are usually per-lun */
+    unsigned long metatag;
+    int status;
+    struct list_head entry;
+};
+
+struct vhba_device {
+    unsigned int num;
+    spinlock_t cmd_lock;
+    struct list_head cmd_list;
+    wait_queue_head_t cmd_wq;
+    atomic_t refcnt;
+
+    unsigned char *kbuf;
+    size_t kbuf_size;
+};
+
+struct vhba_host {
+    struct Scsi_Host *shost;
+    spinlock_t cmd_lock;
+    int cmd_next;
+    struct vhba_command *commands;
+    spinlock_t dev_lock;
+    struct vhba_device *devices[VHBA_MAX_DEVICES];
+    int num_devices;
+    DECLARE_BITMAP(chgmap, VHBA_MAX_DEVICES);
+    int chgtype[VHBA_MAX_DEVICES];
+    struct work_struct scan_devices;
+};
+
+#define MAX_COMMAND_SIZE 16
+
+struct vhba_request {
+    __u32 metatag;
+    __u32 lun;
+    __u8 cdb[MAX_COMMAND_SIZE];
+    __u8 cdb_len;
+    __u32 data_len;
+};
+
+struct vhba_response {
+    __u32 metatag;
+    __u32 status;
+    __u32 data_len;
+};
+
+
+
+static struct vhba_command *vhba_alloc_command (void);
+static void vhba_free_command (struct vhba_command *vcmd);
+
+static struct platform_device vhba_platform_device;
+
+
+
+/* These functions define a symmetric 1:1 mapping between device numbers and
+   the bus and id. We have reserved the last id per bus for the host itself. */
+static void devnum_to_bus_and_id(unsigned int devnum, unsigned int *bus, unsigned int *id)
+{
+    *bus = devnum / (VHBA_MAX_ID-1);
+    *id  = devnum % (VHBA_MAX_ID-1);
+}
+
+static unsigned int bus_and_id_to_devnum(unsigned int bus, unsigned int id)
+{
+    return (bus * (VHBA_MAX_ID-1)) + id;
+}
+
+static struct vhba_device *vhba_device_alloc (void)
+{
+    struct vhba_device *vdev;
+
+    vdev = kzalloc(sizeof(struct vhba_device), GFP_KERNEL);
+    if (!vdev) {
+        return NULL;
+    }
+
+    spin_lock_init(&vdev->cmd_lock);
+    INIT_LIST_HEAD(&vdev->cmd_list);
+    init_waitqueue_head(&vdev->cmd_wq);
+    atomic_set(&vdev->refcnt, 1);
+
+    vdev->kbuf = NULL;
+    vdev->kbuf_size = 0;
+
+    return vdev;
+}
+
+static void vhba_device_put (struct vhba_device *vdev)
+{
+    if (atomic_dec_and_test(&vdev->refcnt)) {
+        kfree(vdev);
+    }
+}
+
+static struct vhba_device *vhba_device_get (struct vhba_device *vdev)
+{
+    atomic_inc(&vdev->refcnt);
+
+    return vdev;
+}
+
+static int vhba_device_queue (struct vhba_device *vdev, struct scsi_cmnd *cmd)
+{
+    struct vhba_host *vhost;
+    struct vhba_command *vcmd;
+    unsigned long flags;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    vcmd = vhba_alloc_command();
+    if (!vcmd) {
+        return SCSI_MLQUEUE_HOST_BUSY;
+    }
+
+    vcmd->cmd = cmd;
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 0)
+    vcmd->metatag = scsi_cmd_to_rq(vcmd->cmd)->tag;
+#else
+    vcmd->metatag = vcmd->cmd->request->tag;
+#endif
+    list_add_tail(&vcmd->entry, &vdev->cmd_list);
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    wake_up_interruptible(&vdev->cmd_wq);
+
+    return 0;
+}
+
+static int vhba_device_dequeue (struct vhba_device *vdev, struct scsi_cmnd *cmd)
+{
+    struct vhba_command *vcmd;
+    int retval;
+    unsigned long flags;
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    list_for_each_entry(vcmd, &vdev->cmd_list, entry) {
+        if (vcmd->cmd == cmd) {
+            list_del_init(&vcmd->entry);
+            break;
+        }
+    }
+
+    /* command not found */
+    if (&vcmd->entry == &vdev->cmd_list) {
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+        return SUCCESS;
+    }
+
+    while (vcmd->status == VHBA_REQ_READING || vcmd->status == VHBA_REQ_WRITING) {
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+        scmd_dbg(cmd, "wait for I/O before aborting\n");
+        schedule_timeout(1);
+        spin_lock_irqsave(&vdev->cmd_lock, flags);
+    }
+
+    retval = (vcmd->status == VHBA_REQ_SENT) ? FAILED : SUCCESS;
+
+    vhba_free_command(vcmd);
+
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    return retval;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
+static int vhba_slave_alloc(struct scsi_device *sdev)
+{
+    struct Scsi_Host *shost = sdev->host;
+
+    sdev_dbg(sdev, "enabling tagging (queue depth: %i).\n", sdev->queue_depth);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+    if (!shost_use_blk_mq(shost) && shost->bqt) {
+#else
+    if (shost->bqt) {
+#endif
+        blk_queue_init_tags(sdev->request_queue, sdev->queue_depth, shost->bqt);
+    }
+    scsi_adjust_queue_depth(sdev, 0, sdev->queue_depth);
+
+    return 0;
+}
+#endif
+
+static void vhba_scan_devices_add (struct vhba_host *vhost, int bus, int id)
+{
+    struct scsi_device *sdev;
+
+    sdev = scsi_device_lookup(vhost->shost, bus, id, 0);
+    if (!sdev) {
+        scsi_add_device(vhost->shost, bus, id, 0);
+    } else {
+        dev_warn(&vhost->shost->shost_gendev, "tried to add an already-existing device %d:%d:0!\n", bus, id);
+        scsi_device_put(sdev);
+    }
+}
+
+static void vhba_scan_devices_remove (struct vhba_host *vhost, int bus, int id)
+{
+    struct scsi_device *sdev;
+
+    sdev = scsi_device_lookup(vhost->shost, bus, id, 0);
+    if (sdev) {
+        scsi_remove_device(sdev);
+        scsi_device_put(sdev);
+    } else {
+        dev_warn(&vhost->shost->shost_gendev, "tried to remove non-existing device %d:%d:0!\n", bus, id);
+    }
+}
+
+static void vhba_scan_devices (struct work_struct *work)
+{
+    struct vhba_host *vhost = container_of(work, struct vhba_host, scan_devices);
+    unsigned long flags;
+    int change, exists;
+    unsigned int devnum;
+    unsigned int bus, id;
+
+    for (;;) {
+        spin_lock_irqsave(&vhost->dev_lock, flags);
+
+        devnum = find_first_bit(vhost->chgmap, VHBA_MAX_DEVICES);
+        if (devnum >= VHBA_MAX_DEVICES) {
+            spin_unlock_irqrestore(&vhost->dev_lock, flags);
+            break;
+        }
+        change = vhost->chgtype[devnum];
+        exists = vhost->devices[devnum] != NULL;
+
+        vhost->chgtype[devnum] = 0;
+        clear_bit(devnum, vhost->chgmap);
+
+        spin_unlock_irqrestore(&vhost->dev_lock, flags);
+
+        devnum_to_bus_and_id(devnum, &bus, &id);
+
+        if (change < 0) {
+            dev_dbg(&vhost->shost->shost_gendev, "trying to remove target %d:%d:0\n", bus, id);
+            vhba_scan_devices_remove(vhost, bus, id);
+        } else if (change > 0) {
+            dev_dbg(&vhost->shost->shost_gendev, "trying to add target %d:%d:0\n", bus, id);
+            vhba_scan_devices_add(vhost, bus, id);
+        } else {
+            /* quick sequence of add/remove or remove/add; we determine
+               which one it was by checking if device structure exists */
+            if (exists) {
+                /* remove followed by add: remove and (re)add */
+                dev_dbg(&vhost->shost->shost_gendev, "trying to (re)add target %d:%d:0\n", bus, id);
+                vhba_scan_devices_remove(vhost, bus, id);
+                vhba_scan_devices_add(vhost, bus, id);
+            } else {
+                /* add followed by remove: no-op */
+                dev_dbg(&vhost->shost->shost_gendev, "no-op for target %d:%d:0\n", bus, id);
+            }
+        }
+    }
+}
+
+static int vhba_add_device (struct vhba_device *vdev)
+{
+    struct vhba_host *vhost;
+    unsigned int devnum;
+    unsigned long flags;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    vhba_device_get(vdev);
+
+    spin_lock_irqsave(&vhost->dev_lock, flags);
+    if (vhost->num_devices >= VHBA_MAX_DEVICES) {
+        spin_unlock_irqrestore(&vhost->dev_lock, flags);
+        vhba_device_put(vdev);
+        return -EBUSY;
+    }
+
+    for (devnum = 0; devnum < VHBA_MAX_DEVICES; devnum++) {
+        if (vhost->devices[devnum] == NULL) {
+            vdev->num = devnum;
+            vhost->devices[devnum] = vdev;
+            vhost->num_devices++;
+            set_bit(devnum, vhost->chgmap);
+            vhost->chgtype[devnum]++;
+            break;
+        }
+    }
+    spin_unlock_irqrestore(&vhost->dev_lock, flags);
+
+    schedule_work(&vhost->scan_devices);
+
+    return 0;
+}
+
+static int vhba_remove_device (struct vhba_device *vdev)
+{
+    struct vhba_host *vhost;
+    unsigned long flags;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    spin_lock_irqsave(&vhost->dev_lock, flags);
+    set_bit(vdev->num, vhost->chgmap);
+    vhost->chgtype[vdev->num]--;
+    vhost->devices[vdev->num] = NULL;
+    vhost->num_devices--;
+    spin_unlock_irqrestore(&vhost->dev_lock, flags);
+
+    vhba_device_put(vdev);
+
+    schedule_work(&vhost->scan_devices);
+
+    return 0;
+}
+
+static struct vhba_device *vhba_lookup_device (int devnum)
+{
+    struct vhba_host *vhost;
+    struct vhba_device *vdev = NULL;
+    unsigned long flags;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    if (likely(devnum < VHBA_MAX_DEVICES)) {
+        spin_lock_irqsave(&vhost->dev_lock, flags);
+        vdev = vhost->devices[devnum];
+        if (vdev) {
+            vdev = vhba_device_get(vdev);
+        }
+
+        spin_unlock_irqrestore(&vhost->dev_lock, flags);
+    }
+
+    return vdev;
+}
+
+static struct vhba_command *vhba_alloc_command (void)
+{
+    struct vhba_host *vhost;
+    struct vhba_command *vcmd;
+    unsigned long flags;
+    int i;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    spin_lock_irqsave(&vhost->cmd_lock, flags);
+
+    vcmd = vhost->commands + vhost->cmd_next++;
+    if (vcmd->status != VHBA_REQ_FREE) {
+        for (i = 0; i < vhba_can_queue; i++) {
+            vcmd = vhost->commands + i;
+
+            if (vcmd->status == VHBA_REQ_FREE) {
+                vhost->cmd_next = i + 1;
+                break;
+            }
+        }
+
+        if (i == vhba_can_queue) {
+            vcmd = NULL;
+        }
+    }
+
+    if (vcmd) {
+        vcmd->status = VHBA_REQ_PENDING;
+    }
+
+    vhost->cmd_next %= vhba_can_queue;
+
+    spin_unlock_irqrestore(&vhost->cmd_lock, flags);
+
+    return vcmd;
+}
+
+static void vhba_free_command (struct vhba_command *vcmd)
+{
+    struct vhba_host *vhost;
+    unsigned long flags;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    spin_lock_irqsave(&vhost->cmd_lock, flags);
+    vcmd->status = VHBA_REQ_FREE;
+    spin_unlock_irqrestore(&vhost->cmd_lock, flags);
+}
+
+static int vhba_queuecommand (struct Scsi_Host *shost, struct scsi_cmnd *cmd)
+{
+    struct vhba_device *vdev;
+    int retval;
+    unsigned int devnum;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 0)
+    scmd_dbg(cmd, "queue %p tag %i\n", cmd, scsi_cmd_to_rq(cmd)->tag);
+#else
+    scmd_dbg(cmd, "queue %p tag %i\n", cmd, cmd->request->tag);
+#endif
+
+    devnum = bus_and_id_to_devnum(cmd->device->channel, cmd->device->id);
+    vdev = vhba_lookup_device(devnum);
+    if (!vdev) {
+        scmd_dbg(cmd, "no such device\n");
+
+        cmd->result = DID_NO_CONNECT << 16;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0)
+        scsi_done(cmd);
+#else
+        cmd->scsi_done(cmd);
+#endif
+
+        return 0;
+    }
+
+    retval = vhba_device_queue(vdev, cmd);
+
+    vhba_device_put(vdev);
+
+    return retval;
+}
+
+static int vhba_abort (struct scsi_cmnd *cmd)
+{
+    struct vhba_device *vdev;
+    int retval = SUCCESS;
+    unsigned int devnum;
+
+    scmd_dbg(cmd, "abort %p\n", cmd);
+
+    devnum = bus_and_id_to_devnum(cmd->device->channel, cmd->device->id);
+    vdev = vhba_lookup_device(devnum);
+    if (vdev) {
+        retval = vhba_device_dequeue(vdev, cmd);
+        vhba_device_put(vdev);
+    } else {
+        cmd->result = DID_NO_CONNECT << 16;
+    }
+
+    return retval;
+}
+
+static struct scsi_host_template vhba_template = {
+    .module = THIS_MODULE,
+    .name = "vhba",
+    .proc_name = "vhba",
+    .queuecommand = vhba_queuecommand,
+    .eh_abort_handler = vhba_abort,
+    .this_id = -1,
+    .max_sectors = VHBA_MAX_SECTORS_PER_IO,
+    .sg_tablesize = 256,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
+    .slave_alloc = vhba_slave_alloc,
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 14, 0)
+    .tag_alloc_policy_rr = true,
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
+    .tag_alloc_policy = BLK_TAG_ALLOC_RR,
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+    .use_blk_tags = 1,
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)
+    .max_segment_size = VHBA_KBUF_SIZE,
+#endif
+};
+
+static ssize_t do_request (struct vhba_device *vdev, unsigned long metatag, struct scsi_cmnd *cmd, char __user *buf, size_t buf_len)
+{
+    struct vhba_request vreq;
+    ssize_t ret;
+
+    scmd_dbg(cmd, "request %lu (%p), cdb 0x%x, bufflen %d, sg count %d\n",
+        metatag, cmd, cmd->cmnd[0], scsi_bufflen(cmd), scsi_sg_count(cmd));
+
+    ret = sizeof(vreq);
+    if (DATA_TO_DEVICE(cmd->sc_data_direction)) {
+        ret += scsi_bufflen(cmd);
+    }
+
+    if (ret > buf_len) {
+        scmd_dbg(cmd, "buffer too small (%zd < %zd) for a request\n", buf_len, ret);
+        return -EIO;
+    }
+
+    vreq.metatag = metatag;
+    vreq.lun = cmd->device->lun;
+    memcpy(vreq.cdb, cmd->cmnd, MAX_COMMAND_SIZE);
+    vreq.cdb_len = cmd->cmd_len;
+    vreq.data_len = scsi_bufflen(cmd);
+
+    if (copy_to_user(buf, &vreq, sizeof(vreq))) {
+        return -EFAULT;
+    }
+
+    if (DATA_TO_DEVICE(cmd->sc_data_direction) && vreq.data_len) {
+        buf += sizeof(vreq);
+
+        if (scsi_sg_count(cmd)) {
+            unsigned char *kaddr, *uaddr;
+            struct scatterlist *sglist = scsi_sglist(cmd);
+            struct scatterlist *sg;
+            int i;
+
+            uaddr = (unsigned char *) buf;
+
+            for_each_sg(sglist, sg, scsi_sg_count(cmd), i) {
+                size_t len = sg->length;
+
+                if (len > vdev->kbuf_size) {
+                    scmd_dbg(cmd, "segment size (%zu) exceeds kbuf size (%zu)!", len, vdev->kbuf_size);
+                    len = vdev->kbuf_size;
+                }
+
+                kaddr = kmap_atomic(sg_page(sg));
+                memcpy(vdev->kbuf, kaddr + sg->offset, len);
+                kunmap_atomic(kaddr);
+
+                if (copy_to_user(uaddr, vdev->kbuf, len)) {
+                    return -EFAULT;
+                }
+                uaddr += len;
+            }
+        } else {
+            if (copy_to_user(buf, scsi_sglist(cmd), vreq.data_len)) {
+                return -EFAULT;
+            }
+        }
+    }
+
+    return ret;
+}
+
+static ssize_t do_response (struct vhba_device *vdev, unsigned long metatag, struct scsi_cmnd *cmd, const char __user *buf, size_t buf_len, struct vhba_response *res)
+{
+    ssize_t ret = 0;
+
+    scmd_dbg(cmd, "response %lu (%p), status %x, data len %d, sg count %d\n",
+         metatag, cmd, res->status, res->data_len, scsi_sg_count(cmd));
+
+    if (res->status) {
+        if (res->data_len > SCSI_SENSE_BUFFERSIZE) {
+            scmd_dbg(cmd, "truncate sense (%d < %d)", SCSI_SENSE_BUFFERSIZE, res->data_len);
+            res->data_len = SCSI_SENSE_BUFFERSIZE;
+        }
+
+        if (copy_from_user(cmd->sense_buffer, buf, res->data_len)) {
+            return -EFAULT;
+        }
+
+        cmd->result = res->status;
+
+        ret += res->data_len;
+    } else if (DATA_FROM_DEVICE(cmd->sc_data_direction) && scsi_bufflen(cmd)) {
+        size_t to_read;
+
+        if (res->data_len > scsi_bufflen(cmd)) {
+            scmd_dbg(cmd, "truncate data (%d < %d)\n", scsi_bufflen(cmd), res->data_len);
+            res->data_len = scsi_bufflen(cmd);
+        }
+
+        to_read = res->data_len;
+
+        if (scsi_sg_count(cmd)) {
+            unsigned char *kaddr, *uaddr;
+            struct scatterlist *sglist = scsi_sglist(cmd);
+            struct scatterlist *sg;
+            int i;
+
+            uaddr = (unsigned char *)buf;
+
+            for_each_sg(sglist, sg, scsi_sg_count(cmd), i) {
+                size_t len = (sg->length < to_read) ? sg->length : to_read;
+
+                if (len > vdev->kbuf_size) {
+                    scmd_dbg(cmd, "segment size (%zu) exceeds kbuf size (%zu)!", len, vdev->kbuf_size);
+                    len = vdev->kbuf_size;
+                }
+
+                if (copy_from_user(vdev->kbuf, uaddr, len)) {
+                    return -EFAULT;
+                }
+                uaddr += len;
+
+                kaddr = kmap_atomic(sg_page(sg));
+                memcpy(kaddr + sg->offset, vdev->kbuf, len);
+                kunmap_atomic(kaddr);
+
+                to_read -= len;
+                if (to_read == 0) {
+                    break;
+                }
+            }
+        } else {
+            if (copy_from_user(scsi_sglist(cmd), buf, res->data_len)) {
+                return -EFAULT;
+            }
+
+            to_read -= res->data_len;
+        }
+
+        scsi_set_resid(cmd, to_read);
+
+        ret += res->data_len - to_read;
+    }
+
+    return ret;
+}
+
+static struct vhba_command *next_command (struct vhba_device *vdev)
+{
+    struct vhba_command *vcmd;
+
+    list_for_each_entry(vcmd, &vdev->cmd_list, entry) {
+        if (vcmd->status == VHBA_REQ_PENDING) {
+            break;
+        }
+    }
+
+    if (&vcmd->entry == &vdev->cmd_list) {
+        vcmd = NULL;
+    }
+
+    return vcmd;
+}
+
+static struct vhba_command *match_command (struct vhba_device *vdev, __u32 metatag)
+{
+    struct vhba_command *vcmd;
+
+    list_for_each_entry(vcmd, &vdev->cmd_list, entry) {
+        if (vcmd->metatag == metatag) {
+            break;
+        }
+    }
+
+    if (&vcmd->entry == &vdev->cmd_list) {
+        vcmd = NULL;
+    }
+
+    return vcmd;
+}
+
+static struct vhba_command *wait_command (struct vhba_device *vdev, unsigned long flags)
+{
+    struct vhba_command *vcmd;
+    DEFINE_WAIT(wait);
+
+    while (!(vcmd = next_command(vdev))) {
+        if (signal_pending(current)) {
+            break;
+        }
+
+        prepare_to_wait(&vdev->cmd_wq, &wait, TASK_INTERRUPTIBLE);
+
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+        schedule();
+
+        spin_lock_irqsave(&vdev->cmd_lock, flags);
+    }
+
+    finish_wait(&vdev->cmd_wq, &wait);
+    if (vcmd) {
+        vcmd->status = VHBA_REQ_READING;
+    }
+
+    return vcmd;
+}
+
+static ssize_t vhba_ctl_read (struct file *file, char __user *buf, size_t buf_len, loff_t *offset)
+{
+    struct vhba_device *vdev;
+    struct vhba_command *vcmd;
+    ssize_t ret;
+    unsigned long flags;
+
+    vdev = file->private_data;
+
+    /* Get next command */
+    if (file->f_flags & O_NONBLOCK) {
+        /* Non-blocking variant */
+        spin_lock_irqsave(&vdev->cmd_lock, flags);
+        vcmd = next_command(vdev);
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+        if (!vcmd) {
+            return -EWOULDBLOCK;
+        }
+    } else {
+        /* Blocking variant */
+        spin_lock_irqsave(&vdev->cmd_lock, flags);
+        vcmd = wait_command(vdev, flags);
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+        if (!vcmd) {
+            return -ERESTARTSYS;
+        }
+    }
+
+    ret = do_request(vdev, vcmd->metatag, vcmd->cmd, buf, buf_len);
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    if (ret >= 0) {
+        vcmd->status = VHBA_REQ_SENT;
+        *offset += ret;
+    } else {
+        vcmd->status = VHBA_REQ_PENDING;
+    }
+
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    return ret;
+}
+
+static ssize_t vhba_ctl_write (struct file *file, const char __user *buf, size_t buf_len, loff_t *offset)
+{
+    struct vhba_device *vdev;
+    struct vhba_command *vcmd;
+    struct vhba_response res;
+    ssize_t ret;
+    unsigned long flags;
+
+    if (buf_len < sizeof(res)) {
+        return -EIO;
+    }
+
+    if (copy_from_user(&res, buf, sizeof(res))) {
+        return -EFAULT;
+    }
+
+    vdev = file->private_data;
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    vcmd = match_command(vdev, res.metatag);
+    if (!vcmd || vcmd->status != VHBA_REQ_SENT) {
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+        pr_debug("ctl dev #%u not expecting response\n", vdev->num);
+        return -EIO;
+    }
+    vcmd->status = VHBA_REQ_WRITING;
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    ret = do_response(vdev, vcmd->metatag, vcmd->cmd, buf + sizeof(res), buf_len - sizeof(res), &res);
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    if (ret >= 0) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0)
+        scsi_done(vcmd->cmd);
+#else
+        vcmd->cmd->scsi_done(vcmd->cmd);
+#endif
+        ret += sizeof(res);
+
+        /* don't compete with vhba_device_dequeue */
+        if (!list_empty(&vcmd->entry)) {
+            list_del_init(&vcmd->entry);
+            vhba_free_command(vcmd);
+        }
+    } else {
+        vcmd->status = VHBA_REQ_SENT;
+    }
+
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    return ret;
+}
+
+static long vhba_ctl_ioctl (struct file *file, unsigned int cmd, unsigned long arg)
+{
+    struct vhba_device *vdev = file->private_data;
+    struct vhba_host *vhost = platform_get_drvdata(&vhba_platform_device);
+
+    switch (cmd) {
+        case 0xBEEF001: {
+            unsigned int ident[4]; /* host, channel, id, lun */
+
+            ident[0] = vhost->shost->host_no;
+            devnum_to_bus_and_id(vdev->num, &ident[1], &ident[2]);
+            ident[3] = 0; /* lun */
+
+            if (copy_to_user((void *) arg, ident, sizeof(ident))) {
+                return -EFAULT;
+            }
+
+            return 0;
+        }
+        case 0xBEEF002: {
+            unsigned int devnum = vdev->num;
+
+            if (copy_to_user((void *) arg, &devnum, sizeof(devnum))) {
+                return -EFAULT;
+            }
+
+            return 0;
+        }
+    }
+
+    return -ENOTTY;
+}
+
+#ifdef CONFIG_COMPAT
+static long vhba_ctl_compat_ioctl (struct file *file, unsigned int cmd, unsigned long arg)
+{
+    unsigned long compat_arg = (unsigned long)compat_ptr(arg);
+    return vhba_ctl_ioctl(file, cmd, compat_arg);
+}
+#endif
+
+static unsigned int vhba_ctl_poll (struct file *file, poll_table *wait)
+{
+    struct vhba_device *vdev = file->private_data;
+    unsigned int mask = 0;
+    unsigned long flags;
+
+    poll_wait(file, &vdev->cmd_wq, wait);
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    if (next_command(vdev)) {
+        mask |= POLLIN | POLLRDNORM;
+    }
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    return mask;
+}
+
+static int vhba_ctl_open (struct inode *inode, struct file *file)
+{
+    struct vhba_device *vdev;
+    int retval;
+
+    pr_debug("ctl dev open\n");
+
+    /* check if vhba is probed */
+    if (!platform_get_drvdata(&vhba_platform_device)) {
+        return -ENODEV;
+    }
+
+    vdev = vhba_device_alloc();
+    if (!vdev) {
+        return -ENOMEM;
+    }
+
+    vdev->kbuf_size = VHBA_KBUF_SIZE;
+    vdev->kbuf = kzalloc(vdev->kbuf_size, GFP_KERNEL);
+    if (!vdev->kbuf) {
+        return -ENOMEM;
+    }
+
+    if (!(retval = vhba_add_device(vdev))) {
+        file->private_data = vdev;
+    }
+
+    vhba_device_put(vdev);
+
+    return retval;
+}
+
+static int vhba_ctl_release (struct inode *inode, struct file *file)
+{
+    struct vhba_device *vdev;
+    struct vhba_command *vcmd;
+    unsigned long flags;
+
+    vdev = file->private_data;
+
+    pr_debug("ctl dev release\n");
+
+    vhba_device_get(vdev);
+    vhba_remove_device(vdev);
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    list_for_each_entry(vcmd, &vdev->cmd_list, entry) {
+        WARN_ON(vcmd->status == VHBA_REQ_READING || vcmd->status == VHBA_REQ_WRITING);
+
+        scmd_dbg(vcmd->cmd, "device released with command %lu (%p)\n", vcmd->metatag, vcmd->cmd);
+        vcmd->cmd->result = DID_NO_CONNECT << 16;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0)
+        scsi_done(vcmd->cmd);
+#else
+        vcmd->cmd->scsi_done(vcmd->cmd);
+#endif
+        vhba_free_command(vcmd);
+    }
+    INIT_LIST_HEAD(&vdev->cmd_list);
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    kfree(vdev->kbuf);
+    vdev->kbuf = NULL;
+
+    vhba_device_put(vdev);
+
+    return 0;
+}
+
+static struct file_operations vhba_ctl_fops = {
+    .owner = THIS_MODULE,
+    .open = vhba_ctl_open,
+    .release = vhba_ctl_release,
+    .read = vhba_ctl_read,
+    .write = vhba_ctl_write,
+    .poll = vhba_ctl_poll,
+    .unlocked_ioctl = vhba_ctl_ioctl,
+#ifdef CONFIG_COMPAT
+    .compat_ioctl = vhba_ctl_compat_ioctl,
+#endif
+};
+
+static struct miscdevice vhba_miscdev = {
+    .minor = MISC_DYNAMIC_MINOR,
+    .name = "vhba_ctl",
+    .fops = &vhba_ctl_fops,
+};
+
+static int vhba_probe (struct platform_device *pdev)
+{
+    struct Scsi_Host *shost;
+    struct vhba_host *vhost;
+    int i;
+
+    vhba_can_queue = clamp(vhba_can_queue, 1, 256);
+
+    shost = scsi_host_alloc(&vhba_template, sizeof(struct vhba_host));
+    if (!shost) {
+        return -ENOMEM;
+    }
+
+    shost->max_channel = VHBA_MAX_BUS-1;
+    shost->max_id = VHBA_MAX_ID;
+    /* we don't support lun > 0 */
+    shost->max_lun = 1;
+    shost->max_cmd_len = MAX_COMMAND_SIZE;
+    shost->can_queue = vhba_can_queue;
+    shost->cmd_per_lun = vhba_can_queue;
+
+    vhost = (struct vhba_host *)shost->hostdata;
+    memset(vhost, 0, sizeof(struct vhba_host));
+
+    vhost->shost = shost;
+    vhost->num_devices = 0;
+    spin_lock_init(&vhost->dev_lock);
+    spin_lock_init(&vhost->cmd_lock);
+    INIT_WORK(&vhost->scan_devices, vhba_scan_devices);
+    vhost->cmd_next = 0;
+    vhost->commands = kzalloc(vhba_can_queue * sizeof(struct vhba_command), GFP_KERNEL);
+    if (!vhost->commands) {
+        return -ENOMEM;
+    }
+
+    for (i = 0; i < vhba_can_queue; i++) {
+        vhost->commands[i].status = VHBA_REQ_FREE;
+    }
+
+    platform_set_drvdata(pdev, vhost);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+    i = scsi_init_shared_tag_map(shost, vhba_can_queue);
+    if (i) return i;
+#endif
+
+    if (scsi_add_host(shost, &pdev->dev)) {
+        scsi_host_put(shost);
+        return -ENOMEM;
+    }
+
+    return 0;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0)
+static int vhba_remove (struct platform_device *pdev)
+#else
+static void vhba_remove (struct platform_device *pdev)
+#endif
+{
+    struct vhba_host *vhost;
+    struct Scsi_Host *shost;
+
+    vhost = platform_get_drvdata(pdev);
+    shost = vhost->shost;
+
+    scsi_remove_host(shost);
+    scsi_host_put(shost);
+
+    kfree(vhost->commands);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0)
+    return 0;
+#endif
+}
+
+static void vhba_release (struct device * dev)
+{
+    return;
+}
+
+static struct platform_device vhba_platform_device = {
+    .name = "vhba",
+    .id = -1,
+    .dev = {
+        .release = vhba_release,
+    },
+};
+
+static struct platform_driver vhba_platform_driver = {
+    .driver = {
+        .owner = THIS_MODULE,
+        .name = "vhba",
+    },
+    .probe = vhba_probe,
+    .remove = vhba_remove,
+};
+
+static int __init vhba_init (void)
+{
+    int ret;
+
+    ret = platform_device_register(&vhba_platform_device);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = platform_driver_register(&vhba_platform_driver);
+    if (ret < 0) {
+        platform_device_unregister(&vhba_platform_device);
+        return ret;
+    }
+
+    ret = misc_register(&vhba_miscdev);
+    if (ret < 0) {
+        platform_driver_unregister(&vhba_platform_driver);
+        platform_device_unregister(&vhba_platform_device);
+        return ret;
+    }
+
+    return 0;
+}
+
+static void __exit vhba_exit(void)
+{
+    misc_deregister(&vhba_miscdev);
+    platform_driver_unregister(&vhba_platform_driver);
+    platform_device_unregister(&vhba_platform_device);
+}
+
+module_init(vhba_init);
+module_exit(vhba_exit);
+
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f0d5be9dc736..f25889adfa43 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -207,6 +207,14 @@ static inline void __mm_zero_struct_page(struct page *page)
 
 extern int sysctl_max_map_count;
 
+extern bool sysctl_workingset_protection;
+extern u8 sysctl_anon_min_ratio;
+extern u8 sysctl_clean_low_ratio;
+extern u8 sysctl_clean_min_ratio;
+int vm_workingset_protection_update_handler(
+	const struct ctl_table *table, int write,
+	void __user *buffer, size_t *lenp, loff_t *ppos);
+
 extern unsigned long sysctl_user_reserve_kbytes;
 extern unsigned long sysctl_admin_reserve_kbytes;
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ec442af3f886..9a0364eb5ee1 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1376,7 +1376,7 @@ struct readahead_control {
 		._index = i,						\
 	}
 
-#define VM_READAHEAD_PAGES	(SZ_128K / PAGE_SIZE)
+#define VM_READAHEAD_PAGES	(SZ_8M / PAGE_SIZE)
 
 void page_cache_ra_unbounded(struct readahead_control *,
 		unsigned long nr_to_read, unsigned long lookahead_count);
diff --git a/init/Kconfig b/init/Kconfig
index fa79feb8fe57..6c8ba1a1a8b9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -194,6 +194,10 @@ config THREAD_INFO_IN_TASK
 
 menu "General setup"
 
+config CACHY
+    bool "Some kernel tweaks by CachyOS"
+    default y
+
 config BROKEN
 	bool
 	help
@@ -1398,6 +1402,22 @@ config USER_NS
 
 	  If unsure, say N.
 
+config USER_NS_UNPRIVILEGED
+	bool "Allow unprivileged users to create namespaces"
+	default y
+	depends on USER_NS
+	help
+	  When disabled, unprivileged users will not be able to create
+	  new namespaces. Allowing users to create their own namespaces
+	  has been part of several recent local privilege escalation
+	  exploits, so if you need user namespaces but are
+	  paranoid^Wsecurity-conscious you want to disable this.
+
+	  This setting can be overridden at runtime via the
+	  kernel.unprivileged_userns_clone sysctl.
+
+	  If unsure, say Y.
+
 config PID_NS
 	bool "PID Namespaces"
 	default y
@@ -1566,6 +1586,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
 	  with the "-O2" compiler flag for best performance and most
 	  helpful compile-time warnings.
 
+config CC_OPTIMIZE_FOR_PERFORMANCE_O3
+	bool "Optimize more for performance (-O3)"
+	help
+	  Choosing this option will pass "-O3" to your compiler to optimize
+	  the kernel yet more for performance.
+
 config CC_OPTIMIZE_FOR_SIZE
 	bool "Optimize for size (-Os)"
 	help
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index ce1435cb08b1..e1359db5561e 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -40,6 +40,27 @@ choice
 	 on SMP and NUMA systems and exactly dividing by both PAL and
 	 NTSC frame rates for video and multimedia work.
 
+	config HZ_500
+		bool "500 HZ"
+	help
+	 500 Hz is a balanced timer frequency. Provides fast interactivity
+	 on desktops with good smoothness without increasing CPU power
+	 consumption and sacrificing the battery life on laptops.
+
+	config HZ_600
+		bool "600 HZ"
+	help
+	 600 Hz is a balanced timer frequency. Provides fast interactivity
+	 on desktops with good smoothness without increasing CPU power
+	 consumption and sacrificing the battery life on laptops.
+
+	config HZ_750
+		bool "750 HZ"
+	help
+	 750 Hz is a balanced timer frequency. Provides fast interactivity
+	 on desktops with good smoothness without increasing CPU power
+	 consumption and sacrificing the battery life on laptops.
+
 	config HZ_1000
 		bool "1000 HZ"
 	help
@@ -53,6 +74,9 @@ config HZ
 	default 100 if HZ_100
 	default 250 if HZ_250
 	default 300 if HZ_300
+	default 500 if HZ_500
+	default 600 if HZ_600
+	default 750 if HZ_750
 	default 1000 if HZ_1000
 
 config SCHED_HRTICK
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index da326800c1c9..cf5267cac7bb 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -88,7 +88,7 @@ endchoice
 
 config PREEMPT_RT
 	bool "Fully Preemptible Kernel (Real-Time)"
-	depends on EXPERT && ARCH_SUPPORTS_RT && !COMPILE_TEST
+	depends on ARCH_SUPPORTS_RT && !COMPILE_TEST
 	select PREEMPTION
 	help
 	  This option turns the kernel into a real-time kernel by replacing
diff --git a/kernel/fork.c b/kernel/fork.c
index b1f3915d5f8e..f779ad112643 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -123,6 +123,16 @@
 
 #include <kunit/visibility.h>
 
+#ifdef CONFIG_USER_NS
+# ifdef CONFIG_USER_NS_UNPRIVILEGED
+static int unprivileged_userns_clone = 1;
+# else
+static int unprivileged_userns_clone = 0;
+# endif
+#else
+#define unprivileged_userns_clone 1
+#endif
+
 /*
  * Minimum number of threads to boot the kernel
  */
@@ -2030,6 +2040,11 @@ __latent_entropy struct task_struct *copy_process(
 			return ERR_PTR(-EINVAL);
 	}
 
+	if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
+		if (!capable(CAP_SYS_ADMIN))
+			return ERR_PTR(-EPERM);
+	}
+
 	/*
 	 * Force any signals received before this point to be delivered
 	 * before the fork happens.  Collect up signals sent to multiple
@@ -3067,6 +3082,10 @@ static int check_unshare_flags(unsigned long unshare_flags)
 		if (!current_is_single_threaded())
 			return -EINVAL;
 	}
+	if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+	}
 
 	return 0;
 }
@@ -3297,6 +3316,15 @@ static const struct ctl_table fork_sysctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= sysctl_max_threads,
 	},
+#ifdef CONFIG_USER_NS
+	{
+		.procname	= "unprivileged_userns_clone",
+		.data		= &unprivileged_userns_clone,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
 };
 
 static int __init init_fork_sysctl(void)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 24df4d98f7d2..1d5923996fa5 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -746,6 +746,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
 	struct task_struct *new, *owner;
 	unsigned long flags, new_flags;
 	enum owner_state state;
+	int i = 0;
 
 	lockdep_assert_preemption_disabled();
 
@@ -782,7 +783,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
 			break;
 		}
 
-		cpu_relax();
+		if (i++ > 1000)
+			cpu_relax();
 	}
 
 	return state;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3eaeceda71b0..66289244f71e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -76,10 +76,19 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
  *
  * (default: 0.70 msec * (1 + ilog(ncpus)), units: nanoseconds)
  */
+#ifdef CONFIG_CACHY
+unsigned int sysctl_sched_base_slice			= 350000ULL;
+static unsigned int normalized_sysctl_sched_base_slice	= 350000ULL;
+#else
 unsigned int sysctl_sched_base_slice			= 700000ULL;
 static unsigned int normalized_sysctl_sched_base_slice	= 700000ULL;
+#endif /* CONFIG_CACHY */
 
+#ifdef CONFIG_CACHY
+__read_mostly unsigned int sysctl_sched_migration_cost	= 300000UL;
+#else
 __read_mostly unsigned int sysctl_sched_migration_cost	= 500000UL;
+#endif
 
 static int __init setup_sched_thermal_decay_shift(char *str)
 {
@@ -122,8 +131,12 @@ int __weak arch_asym_cpu_priority(int cpu)
  *
  * (default: 5 msec, units: microseconds)
  */
+#ifdef CONFIG_CACHY
+static unsigned int sysctl_sched_cfs_bandwidth_slice		= 3000UL;
+#else
 static unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
 #endif
+#endif
 
 #ifdef CONFIG_NUMA_BALANCING
 /* Restrict the NUMA promotion throughput (MB/s) for each target node. */
@@ -1048,7 +1061,7 @@ static bool update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
 #include "pelt.h"
 
-static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
+static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu, int sync);
 static unsigned long task_h_load(struct task_struct *p);
 static unsigned long capacity_of(int cpu);
 
@@ -7502,6 +7515,24 @@ static inline int __select_idle_cpu(int cpu, struct task_struct *p)
 DEFINE_STATIC_KEY_FALSE(sched_smt_present);
 EXPORT_SYMBOL_GPL(sched_smt_present);
 
+/*
+ * Return true if all the CPUs in the SMT core where @cpu belongs are idle,
+ * false otherwise.
+ */
+static bool is_idle_core(int cpu)
+{
+	int sibling;
+
+	if (!sched_smt_active())
+		return (available_idle_cpu(cpu) || sched_idle_cpu(cpu));
+
+	for_each_cpu(sibling, cpu_smt_mask(cpu))
+		if (!available_idle_cpu(sibling) && !sched_idle_cpu(sibling))
+			return false;
+
+	return true;
+}
+
 static inline void set_idle_cores(int cpu, int val)
 {
 	struct sched_domain_shared *sds;
@@ -7770,13 +7801,26 @@ static inline bool asym_fits_cpu(unsigned long util,
 /*
  * Try and locate an idle core/thread in the LLC cache domain.
  */
-static int select_idle_sibling(struct task_struct *p, int prev, int target)
+static int select_idle_sibling(struct task_struct *p, int prev, int target, int sync)
 {
 	bool has_idle_core = false;
 	struct sched_domain *sd;
 	unsigned long task_util, util_min, util_max;
 	int i, recent_used_cpu, prev_aff = -1;
 
+	/* Check a recently used CPU as a potential idle candidate: */
+	recent_used_cpu = p->recent_used_cpu;
+	p->recent_used_cpu = prev;
+	if (recent_used_cpu != prev &&
+	    recent_used_cpu != target &&
+	    cpus_share_cache(recent_used_cpu, target) &&
+	    is_idle_core(recent_used_cpu) &&
+	    cpumask_test_cpu(recent_used_cpu, p->cpus_ptr)) {
+		return recent_used_cpu;
+	} else {
+		recent_used_cpu = -1;
+	}
+
 	/*
 	 * On asymmetric system, update task utilization because we will check
 	 * that the task fits with CPU's capacity.
@@ -7793,7 +7837,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	 */
 	lockdep_assert_irqs_disabled();
 
-	if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
+	if (sync && is_idle_core(target) &&
 	    asym_fits_cpu(task_util, util_min, util_max, target))
 		return target;
 
@@ -7827,24 +7871,6 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 		return prev;
 	}
 
-	/* Check a recently used CPU as a potential idle candidate: */
-	recent_used_cpu = p->recent_used_cpu;
-	p->recent_used_cpu = prev;
-	if (recent_used_cpu != prev &&
-	    recent_used_cpu != target &&
-	    cpus_share_cache(recent_used_cpu, target) &&
-	    (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
-	    cpumask_test_cpu(recent_used_cpu, p->cpus_ptr) &&
-	    asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) {
-
-		if (!static_branch_unlikely(&sched_cluster_active) ||
-		    cpus_share_resources(recent_used_cpu, target))
-			return recent_used_cpu;
-
-	} else {
-		recent_used_cpu = -1;
-	}
-
 	/*
 	 * For asymmetric CPU capacity systems, our domain of interest is
 	 * sd_asym_cpucapacity rather than sd_llc.
@@ -8578,7 +8604,14 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
 		new_cpu = sched_balance_find_dst_cpu(sd, p, cpu, prev_cpu, sd_flag);
 	} else if (wake_flags & WF_TTWU) { /* XXX always ? */
 		/* Fast path */
-		new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
+		/*
+		 * If the previous CPU is an idle core, retain the same for
+		 * cache locality. Otherwise, search for an idle sibling.
+		 */
+		if (is_idle_core(prev_cpu))
+			new_cpu = prev_cpu;
+		else
+			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu, sync);
 	}
 	rcu_read_unlock();
 
@@ -10262,7 +10295,7 @@ static bool sched_use_asym_prio(struct sched_domain *sd, int cpu)
 	if (!sched_smt_active())
 		return true;
 
-	return sd->flags & SD_SHARE_CPUCAPACITY || is_core_idle(cpu);
+	return sd->flags & SD_SHARE_CPUCAPACITY || is_idle_core(cpu);
 }
 
 static inline bool sched_asym(struct sched_domain *sd, int dst_cpu, int src_cpu)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 93fce4bbff5e..3eee6c58d388 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2965,7 +2965,7 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
 
 extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
 
-#ifdef CONFIG_PREEMPT_RT
+#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_CACHY)
 # define SCHED_NR_MIGRATE_BREAK 8
 #else
 # define SCHED_NR_MIGRATE_BREAK 32
diff --git a/mm/Kconfig b/mm/Kconfig
index a992f2203eb9..df5f168a3f30 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -435,6 +435,69 @@ config ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
 config ARCH_WANT_HUGETLB_VMEMMAP_PREINIT
 	bool
 
+config ANON_MIN_RATIO
+	int "Default value for vm.anon_min_ratio"
+	depends on SYSCTL
+	range 0 100
+	default 1
+	help
+	  This option sets the default value for vm.anon_min_ratio sysctl knob.
+
+	  The vm.anon_min_ratio sysctl knob provides *hard* protection of
+	  anonymous pages. The anonymous pages on the current node won't be
+	  reclaimed under any conditions when their amount is below
+	  vm.anon_min_ratio. This knob may be used to prevent excessive swap
+	  thrashing when anonymous memory is low (for example, when memory is
+	  going to be overfilled by compressed data of zram module).
+
+	  Setting this value too high (close to MemTotal) can result in
+	  inability to swap and can lead to early OOM under memory pressure.
+
+config CLEAN_LOW_RATIO
+	int "Default value for vm.clean_low_ratio"
+	depends on SYSCTL
+	range 0 100
+	default 15
+	help
+	  This option sets the default value for vm.clean_low_ratio sysctl knob.
+
+	  The vm.clean_low_ratio sysctl knob provides *best-effort*
+	  protection of clean file pages. The file pages on the current node
+	  won't be reclaimed under memory pressure when the amount of clean file
+	  pages is below vm.clean_low_ratio *unless* we threaten to OOM.
+	  Protection of clean file pages using this knob may be used when
+	  swapping is still possible to
+	    - prevent disk I/O thrashing under memory pressure;
+	    - improve performance in disk cache-bound tasks under memory
+	      pressure.
+
+	  Setting it to a high value may result in a early eviction of anonymous
+	  pages into the swap space by attempting to hold the protected amount
+	  of clean file pages in memory.
+
+config CLEAN_MIN_RATIO
+	int "Default value for vm.clean_min_ratio"
+	depends on SYSCTL
+	range 0 100
+	default 4
+	help
+	  This option sets the default value for vm.clean_min_ratio sysctl knob.
+
+	  The vm.clean_min_ratio sysctl knob provides *hard* protection of
+	  clean file pages. The file pages on the current node won't be
+	  reclaimed under memory pressure when the amount of clean file pages is
+	  below vm.clean_min_ratio. Hard protection of clean file pages using
+	  this knob may be used to
+	    - prevent disk I/O thrashing under memory pressure even with no free
+	      swap space;
+	    - improve performance in disk cache-bound tasks under memory
+	      pressure;
+	    - avoid high latency and prevent livelock in near-OOM conditions.
+
+	  Setting it to a high value may result in a early out-of-memory condition
+	  due to the inability to reclaim the protected amount of clean file pages
+	  when other types of pages cannot be reclaimed.
+
 config HAVE_MEMBLOCK_PHYS_MAP
 	bool
 
@@ -631,7 +694,7 @@ config COMPACTION
 config COMPACT_UNEVICTABLE_DEFAULT
 	int
 	depends on COMPACTION
-	default 0 if PREEMPT_RT
+	default 0 if PREEMPT_RT || CACHY
 	default 1
 
 #
diff --git a/mm/compaction.c b/mm/compaction.c
index 1e8f8eca318c..32326c48162b 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1887,7 +1887,11 @@ static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNE
  * aggressively the kernel should compact memory in the
  * background. It takes values in the range [0, 100].
  */
+#ifdef CONFIG_CACHY
+static unsigned int __read_mostly sysctl_compaction_proactiveness;
+#else
 static unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
+#endif
 static int sysctl_extfrag_threshold = 500;
 static int __read_mostly sysctl_compact_memory;
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 40cf59301c21..50a3a2165bcf 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -63,7 +63,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
 	(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
 #endif
+#ifdef CONFIG_CACHY
+	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
+#else
 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
+#endif
 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
 	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
 
diff --git a/mm/mm_init.c b/mm/mm_init.c
index fc2a6f1e518f..0e45b327169b 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2671,6 +2671,7 @@ static void __init mem_init_print_info(void)
 		, K(totalhigh_pages())
 #endif
 		);
+	printk(KERN_INFO "le9 Unofficial (le9uo) working set protection 1.15a by Masahito Suzuki (forked from hakavlad's original le9 patch)");
 }
 
 void __init __weak arch_mm_preinit(void)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ccdeb0e84d39..57a9eb278221 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -72,7 +72,11 @@ static long ratelimit_pages = 32;
 /*
  * Start background writeback (via writeback threads) at this percentage
  */
+#ifdef CONFIG_CACHY
+static int dirty_background_ratio = 5;
+#else
 static int dirty_background_ratio = 10;
+#endif
 
 /*
  * dirty_background_bytes starts at 0 (disabled) so that it is a function of
@@ -100,7 +104,11 @@ static unsigned long vm_dirty_bytes;
 /*
  * The interval between `kupdate'-style writebacks
  */
+#ifdef CONFIG_CACHY
+unsigned int dirty_writeback_interval = 10 * 100; /* centiseconds */
+#else
 unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */
+#endif
 
 EXPORT_SYMBOL_GPL(dirty_writeback_interval);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cbf758e27aa2..f06dc43e1047 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -302,7 +302,11 @@ const char * const migratetype_names[MIGRATE_TYPES] = {
 
 int min_free_kbytes = 1024;
 int user_min_free_kbytes = -1;
+#ifdef CONFIG_CACHY
+static int watermark_boost_factor __read_mostly;
+#else
 static int watermark_boost_factor __read_mostly = 15000;
+#endif
 static int watermark_scale_factor = 10;
 int defrag_mode;
 
diff --git a/mm/swap.c b/mm/swap.c
index 2260dcd2775e..bc24155a3156 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1101,6 +1101,10 @@ static const struct ctl_table swap_sysctl_table[] = {
  */
 void __init swap_setup(void)
 {
+#ifdef CONFIG_CACHY
+	/* Only swap-in pages requested, avoid readahead */
+	page_cluster = 0;
+#else
 	unsigned long megs = PAGES_TO_MB(totalram_pages());
 
 	/* Use a smaller cluster for small-memory machines */
@@ -1108,6 +1112,7 @@ void __init swap_setup(void)
 		page_cluster = 2;
 	else
 		page_cluster = 3;
+#endif /* CONFIG_CACHY */
 	/*
 	 * Right now other parts of the system means that we
 	 * _really_ don't want to cluster much more
diff --git a/mm/util.c b/mm/util.c
index 97cae40c0209..6aa1e89172f8 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -859,6 +859,40 @@ static const struct ctl_table util_sysctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
 	},
+	{
+		.procname	= "workingset_protection",
+		.data		= &sysctl_workingset_protection,
+		.maxlen		= sizeof(bool),
+		.mode		= 0644,
+		.proc_handler	= &proc_dobool,
+	},
+	{
+		.procname	= "anon_min_ratio",
+		.data		= &sysctl_anon_min_ratio,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= &vm_workingset_protection_update_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE_HUNDRED,
+	},
+	{
+		.procname	= "clean_low_ratio",
+		.data		= &sysctl_clean_low_ratio,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= &vm_workingset_protection_update_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE_HUNDRED,
+	},
+	{
+		.procname	= "clean_min_ratio",
+		.data		= &sysctl_clean_min_ratio,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= &vm_workingset_protection_update_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE_HUNDRED,
+	},
 };
 
 static int __init init_vm_util_sysctls(void)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 614ccf39fe3f..c2604dca2055 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -147,6 +147,15 @@ struct scan_control {
 	/* The file folios on the current node are dangerously low */
 	unsigned int file_is_tiny:1;
 
+	/* The anonymous pages on the current node are below vm.anon_min_ratio */
+	unsigned int anon_below_min:1;
+
+	/* The clean file pages on the current node are below vm.clean_low_ratio */
+	unsigned int clean_below_low:1;
+
+	/* The clean file pages on the current node are below vm.clean_min_ratio */
+	unsigned int clean_below_min:1;
+
 	/* Always discard instead of demoting to lower tier memory */
 	unsigned int no_demotion:1;
 
@@ -196,10 +205,23 @@ struct scan_control {
 #define prefetchw_prev_lru_folio(_folio, _base, _field) do { } while (0)
 #endif
 
+bool sysctl_workingset_protection __read_mostly = true;
+u8 sysctl_anon_min_ratio  __read_mostly = CONFIG_ANON_MIN_RATIO;
+u8 sysctl_clean_low_ratio __read_mostly = CONFIG_CLEAN_LOW_RATIO;
+u8 sysctl_clean_min_ratio __read_mostly = CONFIG_CLEAN_MIN_RATIO;
+static u64 sysctl_anon_min_ratio_kb  __read_mostly = 0;
+static u64 sysctl_clean_low_ratio_kb __read_mostly = 0;
+static u64 sysctl_clean_min_ratio_kb __read_mostly = 0;
+static u64 workingset_protection_prev_totalram __read_mostly = 0;
+
 /*
  * From 0 .. MAX_SWAPPINESS.  Higher means more swappy.
  */
+#ifdef CONFIG_CACHY
+int vm_swappiness = 100;
+#else
 int vm_swappiness = 60;
+#endif
 
 #ifdef CONFIG_MEMCG
 
@@ -1135,6 +1157,10 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 		if (!sc->may_unmap && folio_mapped(folio))
 			goto keep_locked;
 
+		if (folio_is_file_lru(folio) ? sc->clean_below_min :
+				(sc->anon_below_min && !sc->clean_below_min))
+			goto keep_locked;
+
 		/*
 		 * The number of dirty pages determines if a node is marked
 		 * reclaim_congested. kswapd will stall and start writing
@@ -2572,6 +2598,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 		goto out;
 	}
 
+	/*
+	 * Force-scan anon if clean file pages is under vm.clean_low_ratio
+	 * or vm.clean_min_ratio.
+	 */
+	if (sc->clean_below_low || sc->clean_below_min) {
+		scan_balance = SCAN_ANON;
+		goto out;
+	}
+
 	/*
 	 * If there is enough inactive page cache, we do not reclaim
 	 * anything from the anonymous working right now to make sure
@@ -2630,6 +2665,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 			BUG();
 		}
 
+		/*
+		 * Hard protection of the working set.
+		 * Don't reclaim anon/file pages when the amount is
+		 * below the watermark of the same type.
+		 */
+		if (file ? sc->clean_below_min : sc->anon_below_min)
+			scan = 0;
+
 		nr[lru] = scan;
 	}
 }
@@ -2650,6 +2693,96 @@ static bool can_age_anon_pages(struct lruvec *lruvec,
 			  lruvec_memcg(lruvec));
 }
 
+int vm_workingset_protection_update_handler(const struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
+	if (ret || !write)
+		return ret;
+
+	workingset_protection_prev_totalram = 0;
+
+	return 0;
+}
+
+static void prepare_workingset_protection(pg_data_t *pgdat, struct scan_control *sc)
+{
+	unsigned long node_mem_total;
+	struct sysinfo i;
+
+	if (!(sysctl_workingset_protection)) {
+		sc->anon_below_min = 0;
+		sc->clean_below_low = 0;
+		sc->clean_below_min = 0;
+		return;
+	}
+
+	if (likely(sysctl_anon_min_ratio  ||
+	           sysctl_clean_low_ratio ||
+		       sysctl_clean_min_ratio)) {
+#ifdef CONFIG_NUMA
+		si_meminfo_node(&i, pgdat->node_id);
+#else //CONFIG_NUMA
+		si_meminfo(&i);
+#endif //CONFIG_NUMA
+		node_mem_total = i.totalram;
+
+		if (unlikely(workingset_protection_prev_totalram != node_mem_total)) {
+			sysctl_anon_min_ratio_kb  =
+				node_mem_total * sysctl_anon_min_ratio  / 100;
+			sysctl_clean_low_ratio_kb =
+				node_mem_total * sysctl_clean_low_ratio / 100;
+			sysctl_clean_min_ratio_kb =
+				node_mem_total * sysctl_clean_min_ratio / 100;
+			workingset_protection_prev_totalram = node_mem_total;
+		}
+	}
+
+	/*
+	 * Check the number of anonymous pages to protect them from
+	 * reclaiming if their amount is below the specified.
+	 */
+	if (sysctl_anon_min_ratio) {
+		unsigned long reclaimable_anon;
+
+		reclaimable_anon =
+			node_page_state(pgdat, NR_ACTIVE_ANON) +
+			node_page_state(pgdat, NR_INACTIVE_ANON) +
+			node_page_state(pgdat, NR_ISOLATED_ANON);
+
+		sc->anon_below_min = reclaimable_anon < sysctl_anon_min_ratio_kb;
+	} else
+		sc->anon_below_min = 0;
+
+	/*
+	 * Check the number of clean file pages to protect them from
+	 * reclaiming if their amount is below the specified.
+	 */
+	if (sysctl_clean_low_ratio || sysctl_clean_min_ratio) {
+		unsigned long reclaimable_file, dirty, clean;
+
+		reclaimable_file =
+			node_page_state(pgdat, NR_ACTIVE_FILE) +
+			node_page_state(pgdat, NR_INACTIVE_FILE) +
+			node_page_state(pgdat, NR_ISOLATED_FILE);
+		dirty = node_page_state(pgdat, NR_FILE_DIRTY);
+		/*
+		 * node_page_state() sum can go out of sync since
+		 * all the values are not read at once.
+		 */
+		if (likely(reclaimable_file > dirty))
+			clean = reclaimable_file - dirty;
+		else
+			clean = 0;
+
+		sc->clean_below_low = clean < sysctl_clean_low_ratio_kb;
+		sc->clean_below_min = clean < sysctl_clean_min_ratio_kb;
+	} else {
+		sc->clean_below_low = 0;
+		sc->clean_below_min = 0;
+	}
+}
+
 #ifdef CONFIG_LRU_GEN
 
 #ifdef CONFIG_LRU_GEN_ENABLED
@@ -4634,11 +4767,21 @@ static int get_tier_idx(struct lruvec *lruvec, int type)
 	return tier - 1;
 }
 
-static int get_type_to_scan(struct lruvec *lruvec, int swappiness)
+static int get_type_to_scan(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
 {
 	struct ctrl_pos sp, pv;
 
-	if (swappiness <= MIN_SWAPPINESS + 1)
+	if (swappiness == MIN_SWAPPINESS)
+		return LRU_GEN_FILE;
+
+	if (sc->clean_below_min)
+		return LRU_GEN_ANON;
+	if (sc->anon_below_min)
+		return LRU_GEN_FILE;
+	if (sc->clean_below_low)
+		return LRU_GEN_ANON;
+
+	if (swappiness == MIN_SWAPPINESS + 1)
 		return LRU_GEN_FILE;
 
 	if (swappiness >= MAX_SWAPPINESS)
@@ -4658,7 +4801,7 @@ static int isolate_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 			  int *type_scanned, struct list_head *list)
 {
 	int i;
-	int type = get_type_to_scan(lruvec, swappiness);
+	int type = get_type_to_scan(lruvec, sc, swappiness);
 
 	for_each_evictable_type(i, swappiness) {
 		int scanned;
@@ -4904,6 +5047,12 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
 	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
 
+	prepare_workingset_protection(pgdat, sc);
+
+	if (sysctl_workingset_protection && sc->clean_below_min &&
+			!can_reclaim_anon_pages(memcg, pgdat->node_id, sc))
+		return 0;
+
 	/* lru_gen_age_node() called mem_cgroup_calculate_protection() */
 	if (mem_cgroup_below_min(NULL, memcg))
 		return MEMCG_LRU_YOUNG;
@@ -6058,6 +6207,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 
 	prepare_scan_control(pgdat, sc);
 
+	prepare_workingset_protection(pgdat, sc);
+
 	shrink_node_memcgs(pgdat, sc);
 
 	flush_reclaim_state(sc);
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 0906a02de885..b1978874b555 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -242,6 +242,12 @@ ifdef CONFIG_LTO_CLANG
 cmd_ld_single = $(if $(objtool-enabled)$(is-single-obj-m), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@)
 endif
 
+ifdef CONFIG_LTO_CLANG_THIN_DIST
+# Save the _c_flags, sliently.
+quiet_cmd_save_c_flags =
+      cmd_save_c_flags = printf '\n%s\n' 'saved_c_flags_$@ := $(call escsq,$(_c_flags))' >> $(dot-target).cmd
+endif
+
 quiet_cmd_cc_o_c = CC $(quiet_modtag)  $@
       cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< \
 		$(cmd_ld_single) \
@@ -249,6 +255,7 @@ quiet_cmd_cc_o_c = CC $(quiet_modtag)  $@
 
 define rule_cc_o_c
 	$(call cmd_and_fixdep,cc_o_c)
+	$(call cmd,save_c_flags)
 	$(call cmd,checksrc)
 	$(call cmd,checkdoc)
 	$(call cmd,gen_objtooldep)
diff --git a/scripts/Makefile.thinlto b/scripts/Makefile.thinlto
new file mode 100644
index 000000000000..03349ac69de5
--- /dev/null
+++ b/scripts/Makefile.thinlto
@@ -0,0 +1,40 @@
+PHONY := __default
+__default:
+
+include include/config/auto.conf
+include $(srctree)/scripts/Kbuild.include
+include $(srctree)/scripts/Makefile.lib
+
+native-objs := $(patsubst %.o,%.thinlto-native.o,$(call read-file, vmlinux.thinlto-index))
+
+__default: $(native-objs)
+
+# Generate .thinlto-native.o (obj) from .o (bitcode) and .thinlto.bc (summary) files
+# ---------------------------------------------------------------------------
+quiet_cmd_cc_o_bc = CC $(quiet_modtag)  $@
+      be_flags = $(shell sed -n '/saved_c_flags_/s/.*:= //p' \
+		 $(dir $(<)).$(notdir $(<)).cmd)
+      cmd_cc_o_bc = \
+      $(CC) $(be_flags) -x ir -fno-lto -Wno-unused-command-line-argument \
+      -fthinlto-index=$(word 2, $^) -c -o $@ $<
+
+targets += $(native-objs)
+$(native-objs): %.thinlto-native.o: %.o %.o.thinlto.bc   FORCE
+	$(call if_changed,cc_o_bc)
+
+# Add FORCE to the prerequisites of a target to force it to be always rebuilt.
+# ---------------------------------------------------------------------------
+
+PHONY += FORCE
+FORCE:
+
+# Read all saved command lines and dependencies for the $(targets) we
+# may be building above, using $(if_changed{,_dep}). As an
+# optimization, we don't need to read them if the target does not
+# exist, we will rebuild anyway in that case.
+
+existing-targets := $(wildcard $(sort $(targets)))
+
+-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd)
+
+.PHONY: $(PHONY)
diff --git a/scripts/Makefile.vmlinux_a b/scripts/Makefile.vmlinux_a
new file mode 100644
index 000000000000..382cebe1ee57
--- /dev/null
+++ b/scripts/Makefile.vmlinux_a
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+PHONY := __default
+__default: vmlinux.a
+
+include include/config/auto.conf
+include $(srctree)/scripts/Kbuild.include
+include $(srctree)/scripts/Makefile.lib
+
+# Link of built-in-fixup.a
+# ---------------------------------------------------------------------------
+
+# '$(AR) mPi' needs --thin to workaround the bug of llvm-ar <= 14
+quiet_cmd_ar_builtin_fixup = AR      $@
+      cmd_ar_builtin_fixup = \
+	rm -f $@; \
+	$(AR) cDPrS --thin $@ $(KBUILD_VMLINUX_OBJS); \
+	$(AR) mPi --thin $$($(AR) t $@ | sed -n 1p) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt)
+
+targets += built-in-fixup.a
+built-in-fixup.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE
+	$(call if_changed,ar_builtin_fixup)
+
+ifdef CONFIG_LTO_CLANG_THIN_DIST
+
+quiet_cmd_builtin.order = GEN     $@
+      cmd_builtin.order = $(AR) t $< > $@
+
+targets += builtin.order
+builtin.order: built-in-fixup.a FORCE
+	$(call if_changed,builtin.order)
+
+quiet_cmd_ld_thinlto_index = LD      $@
+      cmd_ld_thinlto_index = \
+	$(LD) $(KBUILD_LDFLAGS) -r --thinlto-index-only=$@ @$<
+
+targets += vmlinux.thinlto-index
+vmlinux.thinlto-index: builtin.order FORCE
+	$(call if_changed,ld_thinlto_index)
+
+quiet_cmd_ar_vmlinux.a = GEN     $@
+      cmd_ar_vmlinux.a =					\
+	rm -f $@;						\
+	while read -r obj; do					\
+		if grep -q $${obj} $(word 2, $^); then		\
+			echo $${obj%.o}.thinlto-native.o;	\
+		else						\
+			echo $${obj};				\
+		fi;						\
+	done < $< | xargs $(AR) cDPrS --thin $@
+
+targets += vmlinux.a
+vmlinux.a: builtin.order vmlinux.thinlto-index FORCE
+	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.thinlto
+	$(call if_changed,ar_vmlinux.a)
+
+else
+
+# vmlinux.a
+# ---------------------------------------------------------------------------
+
+targets += vmlinux.a
+vmlinux.a: built-in-fixup.a FORCE
+	$(call if_changed,copy)
+
+endif
+
+# Add FORCE to the prerequisites of a target to force it to be always rebuilt.
+# ---------------------------------------------------------------------------
+
+PHONY += FORCE
+FORCE:
+
+# Read all saved command lines and dependencies for the $(targets) we
+# may be building above, using $(if_changed{,_dep}). As an
+# optimization, we don't need to read them if the target does not
+# exist, we will rebuild anyway in that case.
+
+existing-targets := $(wildcard $(sort $(targets)))
+
+-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd)
+
+.PHONY: $(PHONY)
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 755b842f1f9b..7d9881a2243c 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1478,13 +1478,22 @@ static void extract_crcs_for_object(const char *object, struct module *mod)
 	char cmd_file[PATH_MAX];
 	char *buf, *p;
 	const char *base;
-	int dirlen, ret;
+	int dirlen, baselen_without_suffix, ret;
 
 	base = get_basename(object);
 	dirlen = base - object;
 
-	ret = snprintf(cmd_file, sizeof(cmd_file), "%.*s.%s.cmd",
-		       dirlen, object, base);
+	baselen_without_suffix = strlen(object) - dirlen - strlen(".o");
+
+	/*
+	 * When CONFIG_LTO_CLANG_THIN_DIST=y, the ELF is *.thinlto-native.o
+	 * but the symbol CRCs are recorded in *.o.cmd file.
+	 */
+	if (strends(object, ".thinlto-native.o"))
+		baselen_without_suffix -= strlen(".thinlto-native");
+
+	ret = snprintf(cmd_file, sizeof(cmd_file), "%.*s.%.*s.o.cmd",
+		       dirlen, object, baselen_without_suffix, base);
 	if (ret >= sizeof(cmd_file)) {
 		error("%s: too long path was truncated\n", cmd_file);
 		return;
diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD
index 452374d63c24..08f80d7c5df0 100644
--- a/scripts/package/PKGBUILD
+++ b/scripts/package/PKGBUILD
@@ -90,6 +90,11 @@ _package-headers() {
 		"${srctree}/scripts/package/install-extmod-build" "${builddir}"
 	fi
 
+	# required when DEBUG_INFO_BTF_MODULES is enabled
+	if [ -f tools/bpf/resolve_btfids/resolve_btfids ]; then
+		install -Dt "$builddir/tools/bpf/resolve_btfids" tools/bpf/resolve_btfids/resolve_btfids
+	fi
+
 	echo "Installing System.map and config..."
 	mkdir -p "${builddir}"
 	cp System.map "${builddir}/System.map"
-- 
2.52.0

From 44e15a4dc9cebf584c56a3e0f2c627a7f18e7e56 Mon Sep 17 00:00:00 2001
From: Eric Naim <dnaim@cachyos.org>
Date: Mon, 26 Jan 2026 17:33:31 +0800
Subject: [PATCH 5/9] fixes

Signed-off-by: Eric Naim <dnaim@cachyos.org>
---
 arch/arm/include/asm/mmu_context.h      |  2 +-
 arch/riscv/include/asm/sync_core.h      |  2 +-
 arch/s390/include/asm/mmu_context.h     |  2 +-
 arch/sparc/include/asm/mmu_context_64.h |  2 +-
 arch/x86/include/asm/mmu_context.h      | 23 +++++++++-
 arch/x86/include/asm/sync_core.h        |  2 +-
 arch/x86/kernel/cpu/amd.c               |  6 +++
 arch/x86/mm/tlb.c                       | 21 ---------
 drivers/android/binder/thread.rs        | 59 ++++++++++++++-----------
 drivers/bluetooth/btusb.c               |  2 +
 drivers/gpu/drm/i915/gt/intel_rc6.c     | 13 +++++-
 include/linux/perf_event.h              |  2 +-
 include/linux/sched/mm.h                | 10 ++---
 include/linux/tick.h                    |  4 +-
 include/linux/vtime.h                   |  8 ++--
 kernel/rcu/tree.c                       | 12 +++++
 kernel/sched/core.c                     | 19 +++-----
 kernel/sched/ext.c                      |  4 +-
 kernel/sched/fair.c                     | 11 ++---
 kernel/sched/sched.h                    | 24 +++++-----
 20 files changed, 131 insertions(+), 97 deletions(-)

diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index db2cb06aa8cf..bebde469f81a 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -80,7 +80,7 @@ static inline void check_and_switch_context(struct mm_struct *mm,
 #ifndef MODULE
 #define finish_arch_post_lock_switch \
 	finish_arch_post_lock_switch
-static inline void finish_arch_post_lock_switch(void)
+static __always_inline void finish_arch_post_lock_switch(void)
 {
 	struct mm_struct *mm = current->mm;
 
diff --git a/arch/riscv/include/asm/sync_core.h b/arch/riscv/include/asm/sync_core.h
index 9153016da8f1..2fe6b7fe6b12 100644
--- a/arch/riscv/include/asm/sync_core.h
+++ b/arch/riscv/include/asm/sync_core.h
@@ -6,7 +6,7 @@
  * RISC-V implements return to user-space through an xRET instruction,
  * which is not core serializing.
  */
-static inline void sync_core_before_usermode(void)
+static __always_inline void sync_core_before_usermode(void)
 {
 	asm volatile ("fence.i" ::: "memory");
 }
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index d9b8501bc93d..c124ef6a01b3 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -97,7 +97,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 }
 
 #define finish_arch_post_lock_switch finish_arch_post_lock_switch
-static inline void finish_arch_post_lock_switch(void)
+static __always_inline void finish_arch_post_lock_switch(void)
 {
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
index 78bbacc14d2d..d1967214ef25 100644
--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -160,7 +160,7 @@ static inline void arch_start_context_switch(struct task_struct *prev)
 }
 
 #define finish_arch_post_lock_switch	finish_arch_post_lock_switch
-static inline void finish_arch_post_lock_switch(void)
+static __always_inline void finish_arch_post_lock_switch(void)
 {
 	/* Restore the state of MCDPER register for the new process
 	 * just switched to.
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 73bf3b1b44e8..ecd134dcfb34 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -136,8 +136,29 @@ static inline void mm_reset_untag_mask(struct mm_struct *mm)
 }
 #endif
 
+/*
+ * Please ignore the name of this function.  It should be called
+ * switch_to_kernel_thread().
+ *
+ * enter_lazy_tlb() is a hint from the scheduler that we are entering a
+ * kernel thread or other context without an mm.  Acceptable implementations
+ * include doing nothing whatsoever, switching to init_mm, or various clever
+ * lazy tricks to try to minimize TLB flushes.
+ *
+ * The scheduler reserves the right to call enter_lazy_tlb() several times
+ * in a row.  It will notify us that we're going back to a real mm by
+ * calling switch_mm_irqs_off().
+ */
 #define enter_lazy_tlb enter_lazy_tlb
-extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+#ifndef MODULE
+static __always_inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
+		return;
+
+	this_cpu_write(cpu_tlbstate_shared.is_lazy, true);
+}
+#endif
 
 #define mm_init_global_asid mm_init_global_asid
 extern void mm_init_global_asid(struct mm_struct *mm);
diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
index 96bda43538ee..4b55fa353bb5 100644
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -93,7 +93,7 @@ static __always_inline void sync_core(void)
  * to user-mode. x86 implements return to user-space through sysexit,
  * sysrel, and sysretq, which are not core serializing.
  */
-static inline void sync_core_before_usermode(void)
+static __always_inline void sync_core_before_usermode(void)
 {
 	/* With PTI, we unconditionally serialize before running user code. */
 	if (static_cpu_has(X86_FEATURE_PTI))
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bc94ff1e250a..fcc1a954ccb3 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1035,6 +1035,7 @@ static void init_amd_zen4(struct cpuinfo_x86 *c)
 	}
 }
 
+#ifndef CONFIG_CACHY
 static const struct x86_cpu_id zen5_rdseed_microcode[] = {
 	ZEN_MODEL_STEP_UCODE(0x1a, 0x02, 0x1, 0x0b00215a),
 	ZEN_MODEL_STEP_UCODE(0x1a, 0x08, 0x1, 0x0b008121),
@@ -1056,6 +1057,11 @@ static void init_amd_zen5(struct cpuinfo_x86 *c)
 		pr_emerg_once("RDSEED32 is broken. Disabling the corresponding CPUID bit.\n");
 	}
 }
+#else
+static void init_amd_zen5(struct cpuinfo_x86 *c)
+{
+}
+#endif /* !CONFIG_CACHY */
 
 static void init_amd(struct cpuinfo_x86 *c)
 {
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index f5b93e01e347..71abaf0bdb91 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -971,27 +971,6 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
 	}
 }
 
-/*
- * Please ignore the name of this function.  It should be called
- * switch_to_kernel_thread().
- *
- * enter_lazy_tlb() is a hint from the scheduler that we are entering a
- * kernel thread or other context without an mm.  Acceptable implementations
- * include doing nothing whatsoever, switching to init_mm, or various clever
- * lazy tricks to try to minimize TLB flushes.
- *
- * The scheduler reserves the right to call enter_lazy_tlb() several times
- * in a row.  It will notify us that we're going back to a real mm by
- * calling switch_mm_irqs_off().
- */
-void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
-		return;
-
-	this_cpu_write(cpu_tlbstate_shared.is_lazy, true);
-}
-
 /*
  * Using a temporary mm allows to set temporary mappings that are not accessible
  * by other CPUs. Such mappings are needed to perform sensitive memory writes
diff --git a/drivers/android/binder/thread.rs b/drivers/android/binder/thread.rs
index 1a8e6fdc0dc4..dcd47e10aeb8 100644
--- a/drivers/android/binder/thread.rs
+++ b/drivers/android/binder/thread.rs
@@ -69,17 +69,24 @@ struct ScatterGatherEntry {
 }
 
 /// This entry specifies that a fixup should happen at `target_offset` of the
-/// buffer. If `skip` is nonzero, then the fixup is a `binder_fd_array_object`
-/// and is applied later. Otherwise if `skip` is zero, then the size of the
-/// fixup is `sizeof::<u64>()` and `pointer_value` is written to the buffer.
-struct PointerFixupEntry {
-    /// The number of bytes to skip, or zero for a `binder_buffer_object` fixup.
-    skip: usize,
-    /// The translated pointer to write when `skip` is zero.
-    pointer_value: u64,
-    /// The offset at which the value should be written. The offset is relative
-    /// to the original buffer.
-    target_offset: usize,
+/// buffer.
+enum PointerFixupEntry {
+    /// A fixup for a `binder_buffer_object`.
+    Fixup {
+        /// The translated pointer to write.
+        pointer_value: u64,
+        /// The offset at which the value should be written. The offset is relative
+        /// to the original buffer.
+        target_offset: usize,
+    },
+    /// A skip for a `binder_fd_array_object`.
+    Skip {
+        /// The number of bytes to skip.
+        skip: usize,
+        /// The offset at which the skip should happen. The offset is relative
+        /// to the original buffer.
+        target_offset: usize,
+    },
 }
 
 /// Return type of `apply_and_validate_fixup_in_parent`.
@@ -762,8 +769,7 @@ fn translate_object(
 
                     parent_entry.fixup_min_offset = info.new_min_offset;
                     parent_entry.pointer_fixups.push(
-                        PointerFixupEntry {
-                            skip: 0,
+                        PointerFixupEntry::Fixup {
                             pointer_value: buffer_ptr_in_user_space,
                             target_offset: info.target_offset,
                         },
@@ -807,9 +813,8 @@ fn translate_object(
                 parent_entry
                     .pointer_fixups
                     .push(
-                        PointerFixupEntry {
+                        PointerFixupEntry::Skip {
                             skip: fds_len,
-                            pointer_value: 0,
                             target_offset: info.target_offset,
                         },
                         GFP_KERNEL,
@@ -871,17 +876,21 @@ fn apply_sg(&self, alloc: &mut Allocation, sg_state: &mut ScatterGatherState) ->
             let mut reader =
                 UserSlice::new(UserPtr::from_addr(sg_entry.sender_uaddr), sg_entry.length).reader();
             for fixup in &mut sg_entry.pointer_fixups {
-                let fixup_len = if fixup.skip == 0 {
-                    size_of::<u64>()
-                } else {
-                    fixup.skip
+                let (fixup_len, fixup_offset) = match fixup {
+                    PointerFixupEntry::Fixup { target_offset, .. } => {
+                        (size_of::<u64>(), *target_offset)
+                    }
+                    PointerFixupEntry::Skip {
+                        skip,
+                        target_offset,
+                    } => (*skip, *target_offset),
                 };
 
-                let target_offset_end = fixup.target_offset.checked_add(fixup_len).ok_or(EINVAL)?;
-                if fixup.target_offset < end_of_previous_fixup || offset_end < target_offset_end {
+                let target_offset_end = fixup_offset.checked_add(fixup_len).ok_or(EINVAL)?;
+                if fixup_offset < end_of_previous_fixup || offset_end < target_offset_end {
                     pr_warn!(
                         "Fixups oob {} {} {} {}",
-                        fixup.target_offset,
+                        fixup_offset,
                         end_of_previous_fixup,
                         offset_end,
                         target_offset_end
@@ -890,13 +899,13 @@ fn apply_sg(&self, alloc: &mut Allocation, sg_state: &mut ScatterGatherState) ->
                 }
 
                 let copy_off = end_of_previous_fixup;
-                let copy_len = fixup.target_offset - end_of_previous_fixup;
+                let copy_len = fixup_offset - end_of_previous_fixup;
                 if let Err(err) = alloc.copy_into(&mut reader, copy_off, copy_len) {
                     pr_warn!("Failed copying into alloc: {:?}", err);
                     return Err(err.into());
                 }
-                if fixup.skip == 0 {
-                    let res = alloc.write::<u64>(fixup.target_offset, &fixup.pointer_value);
+                if let PointerFixupEntry::Fixup { pointer_value, .. } = fixup {
+                    let res = alloc.write::<u64>(fixup_offset, pointer_value);
                     if let Err(err) = res {
                         pr_warn!("Failed copying ptr into alloc: {:?}", err);
                         return Err(err.into());
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index ded09e94d296..e9c672e7846a 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -557,6 +557,8 @@ static const struct usb_device_id quirks_table[] = {
 						     BTUSB_WIDEBAND_SPEECH },
 	{ USB_DEVICE(0x13d3, 0x3592), .driver_info = BTUSB_REALTEK |
 						     BTUSB_WIDEBAND_SPEECH },
+	{ USB_DEVICE(0x0489, 0xe112), .driver_info = BTUSB_REALTEK |
+						     BTUSB_WIDEBAND_SPEECH },
 	{ USB_DEVICE(0x0489, 0xe122), .driver_info = BTUSB_REALTEK |
 						     BTUSB_WIDEBAND_SPEECH },
 
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 286d49ecc449..472c68b32516 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -2,9 +2,9 @@
 /*
  * Copyright © 2019 Intel Corporation
  */
-
 #include <linux/pm_runtime.h>
 #include <linux/string_helpers.h>
+#include <linux/dmi.h>
 
 #include <drm/drm_print.h>
 
@@ -497,6 +497,17 @@ static bool rc6_supported(struct intel_rc6 *rc6)
 	if (!HAS_RC6(i915))
 		return false;
 
+	if (dmi_match(DMI_PRODUCT_SKU, "IBP1XI08MK1") ||
+	    dmi_match(DMI_PRODUCT_SKU, "IBP14I08MK2") ||
+	    dmi_match(DMI_PRODUCT_SKU, "IBP1XI08MK2") ||
+	    dmi_match(DMI_PRODUCT_SKU, "IBP16I08MK2") ||
+	    dmi_match(DMI_PRODUCT_SKU, "OMNIA08IMK1") ||
+	    dmi_match(DMI_PRODUCT_SKU, "OMNIA08IMK2")) {
+		drm_notice(&i915->drm,
+			   "RC6 disabled by quirk\n");
+		return false;
+	}
+
 	if (intel_vgpu_active(i915))
 		return false;
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9ded2e582c60..fbe94b65b30a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1624,7 +1624,7 @@ static inline void perf_event_task_migrate(struct task_struct *task)
 		task->sched_migrated = 1;
 }
 
-static inline void perf_event_task_sched_in(struct task_struct *prev,
+static __always_inline void perf_event_task_sched_in(struct task_struct *prev,
 					    struct task_struct *task)
 {
 	if (static_branch_unlikely(&perf_sched_events))
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 95d0040df584..4a279ee2d026 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -44,7 +44,7 @@ static inline void smp_mb__after_mmgrab(void)
 
 extern void __mmdrop(struct mm_struct *mm);
 
-static inline void mmdrop(struct mm_struct *mm)
+static __always_inline void mmdrop(struct mm_struct *mm)
 {
 	/*
 	 * The implicit full barrier implied by atomic_dec_and_test() is
@@ -71,14 +71,14 @@ static inline void __mmdrop_delayed(struct rcu_head *rhp)
  * Invoked from finish_task_switch(). Delegates the heavy lifting on RT
  * kernels via RCU.
  */
-static inline void mmdrop_sched(struct mm_struct *mm)
+static __always_inline void mmdrop_sched(struct mm_struct *mm)
 {
 	/* Provides a full memory barrier. See mmdrop() */
 	if (atomic_dec_and_test(&mm->mm_count))
 		call_rcu(&mm->delayed_drop, __mmdrop_delayed);
 }
 #else
-static inline void mmdrop_sched(struct mm_struct *mm)
+static __always_inline void mmdrop_sched(struct mm_struct *mm)
 {
 	mmdrop(mm);
 }
@@ -104,7 +104,7 @@ static inline void mmdrop_lazy_tlb(struct mm_struct *mm)
 	}
 }
 
-static inline void mmdrop_lazy_tlb_sched(struct mm_struct *mm)
+static __always_inline void mmdrop_lazy_tlb_sched(struct mm_struct *mm)
 {
 	if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
 		mmdrop_sched(mm);
@@ -532,7 +532,7 @@ enum {
 #include <asm/membarrier.h>
 #endif
 
-static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
+static __always_inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
 {
 	/*
 	 * The atomic_read() below prevents CSE. The following should
diff --git a/include/linux/tick.h b/include/linux/tick.h
index ac76ae9fa36d..fce16aa10ba2 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -175,7 +175,7 @@ extern cpumask_var_t tick_nohz_full_mask;
 #ifdef CONFIG_NO_HZ_FULL
 extern bool tick_nohz_full_running;
 
-static inline bool tick_nohz_full_enabled(void)
+static __always_inline bool tick_nohz_full_enabled(void)
 {
 	if (!context_tracking_enabled())
 		return false;
@@ -299,7 +299,7 @@ static inline void __tick_nohz_task_switch(void) { }
 static inline void tick_nohz_full_setup(cpumask_var_t cpumask) { }
 #endif
 
-static inline void tick_nohz_task_switch(void)
+static __always_inline void tick_nohz_task_switch(void)
 {
 	if (tick_nohz_full_enabled())
 		__tick_nohz_task_switch();
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 29dd5b91dd7d..428464bb81b3 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -67,24 +67,24 @@ static __always_inline void vtime_account_guest_exit(void)
  * For now vtime state is tied to context tracking. We might want to decouple
  * those later if necessary.
  */
-static inline bool vtime_accounting_enabled(void)
+static __always_inline bool vtime_accounting_enabled(void)
 {
 	return context_tracking_enabled();
 }
 
-static inline bool vtime_accounting_enabled_cpu(int cpu)
+static __always_inline bool vtime_accounting_enabled_cpu(int cpu)
 {
 	return context_tracking_enabled_cpu(cpu);
 }
 
-static inline bool vtime_accounting_enabled_this_cpu(void)
+static __always_inline bool vtime_accounting_enabled_this_cpu(void)
 {
 	return context_tracking_enabled_this_cpu();
 }
 
 extern void vtime_task_switch_generic(struct task_struct *prev);
 
-static inline void vtime_task_switch(struct task_struct *prev)
+static __always_inline void vtime_task_switch(struct task_struct *prev)
 {
 	if (vtime_accounting_enabled_this_cpu())
 		vtime_task_switch_generic(prev);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 293bbd9ac3f4..2c1c9759e278 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -160,6 +160,7 @@ static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
 			      unsigned long gps, unsigned long flags);
 static void invoke_rcu_core(void);
 static void rcu_report_exp_rdp(struct rcu_data *rdp);
+static void rcu_report_qs_rdp(struct rcu_data *rdp);
 static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp);
 static bool rcu_rdp_is_offloaded(struct rcu_data *rdp);
 static bool rcu_rdp_cpu_online(struct rcu_data *rdp);
@@ -1983,6 +1984,17 @@ static noinline_for_stack bool rcu_gp_init(void)
 	if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
 		on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
 
+	/*
+	 * Immediately report QS for the GP kthread's CPU. The GP kthread
+	 * cannot be in an RCU read-side critical section while running
+	 * the FQS scan. This eliminates the need for a second FQS wait
+	 * when all CPUs are idle.
+	 */
+	preempt_disable();
+	rcu_qs();
+	rcu_report_qs_rdp(this_cpu_ptr(&rcu_data));
+	preempt_enable();
+
 	return true;
 }
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 045f83ad261e..e51f38edb32e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -678,11 +678,6 @@ bool raw_spin_rq_trylock(struct rq *rq)
 	}
 }
 
-void raw_spin_rq_unlock(struct rq *rq)
-{
-	raw_spin_unlock(rq_lockp(rq));
-}
-
 /*
  * double_rq_lock - safely lock two runqueues
  */
@@ -4869,7 +4864,7 @@ static inline void prepare_task(struct task_struct *next)
 	WRITE_ONCE(next->on_cpu, 1);
 }
 
-static inline void finish_task(struct task_struct *prev)
+static __always_inline void finish_task(struct task_struct *prev)
 {
 	/*
 	 * This must be the very last reference to @prev from this CPU. After
@@ -4885,7 +4880,7 @@ static inline void finish_task(struct task_struct *prev)
 	smp_store_release(&prev->on_cpu, 0);
 }
 
-static void do_balance_callbacks(struct rq *rq, struct balance_callback *head)
+static __always_inline void do_balance_callbacks(struct rq *rq, struct balance_callback *head)
 {
 	void (*func)(struct rq *rq);
 	struct balance_callback *next;
@@ -4920,7 +4915,7 @@ struct balance_callback balance_push_callback = {
 	.func = balance_push,
 };
 
-static inline struct balance_callback *
+static __always_inline struct balance_callback *
 __splice_balance_callbacks(struct rq *rq, bool split)
 {
 	struct balance_callback *head = rq->balance_callback;
@@ -4950,7 +4945,7 @@ struct balance_callback *splice_balance_callbacks(struct rq *rq)
 	return __splice_balance_callbacks(rq, true);
 }
 
-void __balance_callbacks(struct rq *rq, struct rq_flags *rf)
+__always_inline void __balance_callbacks(struct rq *rq, struct rq_flags *rf)
 {
 	if (rf)
 		rq_unpin_lock(rq, rf);
@@ -4987,7 +4982,7 @@ prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf
 #endif
 }
 
-static inline void finish_lock_switch(struct rq *rq)
+static __always_inline void finish_lock_switch(struct rq *rq)
 {
 	/*
 	 * If we are tracking spinlock dependencies then we have to
@@ -5019,7 +5014,7 @@ static inline void kmap_local_sched_out(void)
 #endif
 }
 
-static inline void kmap_local_sched_in(void)
+static __always_inline void kmap_local_sched_in(void)
 {
 #ifdef CONFIG_KMAP_LOCAL
 	if (unlikely(current->kmap_ctrl.idx))
@@ -5072,7 +5067,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
  * past. 'prev == current' is still correct but we need to recalculate this_rq
  * because prev may have moved to another CPU.
  */
-static struct rq *finish_task_switch(struct task_struct *prev)
+static __always_inline struct rq *finish_task_switch(struct task_struct *prev)
 	__releases(rq->lock)
 {
 	struct rq *rq = this_rq();
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index afe28c04d5aa..a661aa8c1b1d 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3440,8 +3440,8 @@ static void destroy_dsq(struct scx_sched *sch, u64 dsq_id)
 	 * operations inside scheduler locks.
 	 */
 	dsq->id = SCX_DSQ_INVALID;
-	llist_add(&dsq->free_node, &dsqs_to_free);
-	irq_work_queue(&free_dsq_irq_work);
+	if (llist_add(&dsq->free_node, &dsqs_to_free))
+		irq_work_queue(&free_dsq_irq_work);
 
 out_unlock_dsq:
 	raw_spin_unlock_irqrestore(&dsq->lock, flags);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 66289244f71e..3263819effb7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2471,11 +2471,8 @@ static void task_numa_find_cpu(struct task_numa_env *env,
 		maymove = !load_too_imbalanced(src_load, dst_load, env);
 	}
 
-	for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) {
-		/* Skip this CPU if the source task cannot migrate */
-		if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
-			continue;
-
+	/* Skip CPUs if the source task cannot migrate */
+	for_each_cpu_and(cpu, cpumask_of_node(env->dst_nid), env->p->cpus_ptr) {
 		env->dst_cpu = cpu;
 		if (task_numa_compare(env, taskimp, groupimp, maymove))
 			break;
@@ -8391,9 +8388,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 		int max_spare_cap_cpu = -1;
 		int fits, max_fits = -1;
 
-		cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask);
-
-		if (cpumask_empty(cpus))
+		if (!cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask))
 			continue;
 
 		/* Account external pressure for the energy estimation */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3eee6c58d388..52ad9cdf7585 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1403,12 +1403,12 @@ static inline struct cpumask *sched_group_span(struct sched_group *sg);
 
 DECLARE_STATIC_KEY_FALSE(__sched_core_enabled);
 
-static inline bool sched_core_enabled(struct rq *rq)
+static __always_inline bool sched_core_enabled(struct rq *rq)
 {
 	return static_branch_unlikely(&__sched_core_enabled) && rq->core_enabled;
 }
 
-static inline bool sched_core_disabled(void)
+static __always_inline bool sched_core_disabled(void)
 {
 	return !static_branch_unlikely(&__sched_core_enabled);
 }
@@ -1417,7 +1417,7 @@ static inline bool sched_core_disabled(void)
  * Be careful with this function; not for general use. The return value isn't
  * stable unless you actually hold a relevant rq->__lock.
  */
-static inline raw_spinlock_t *rq_lockp(struct rq *rq)
+static __always_inline raw_spinlock_t *rq_lockp(struct rq *rq)
 {
 	if (sched_core_enabled(rq))
 		return &rq->core->__lock;
@@ -1425,7 +1425,7 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)
 	return &rq->__lock;
 }
 
-static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
+static __always_inline raw_spinlock_t *__rq_lockp(struct rq *rq)
 {
 	if (rq->core_enabled)
 		return &rq->core->__lock;
@@ -1519,12 +1519,12 @@ static inline bool sched_core_disabled(void)
 	return true;
 }
 
-static inline raw_spinlock_t *rq_lockp(struct rq *rq)
+static __always_inline raw_spinlock_t *rq_lockp(struct rq *rq)
 {
 	return &rq->__lock;
 }
 
-static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
+static __always_inline raw_spinlock_t *__rq_lockp(struct rq *rq)
 {
 	return &rq->__lock;
 }
@@ -1573,20 +1573,24 @@ static inline void lockdep_assert_rq_held(struct rq *rq)
 
 extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass);
 extern bool raw_spin_rq_trylock(struct rq *rq);
-extern void raw_spin_rq_unlock(struct rq *rq);
 
-static inline void raw_spin_rq_lock(struct rq *rq)
+static __always_inline void raw_spin_rq_lock(struct rq *rq)
 {
 	raw_spin_rq_lock_nested(rq, 0);
 }
 
-static inline void raw_spin_rq_lock_irq(struct rq *rq)
+static __always_inline void raw_spin_rq_unlock(struct rq *rq)
+{
+	raw_spin_unlock(rq_lockp(rq));
+}
+
+static __always_inline void raw_spin_rq_lock_irq(struct rq *rq)
 {
 	local_irq_disable();
 	raw_spin_rq_lock(rq);
 }
 
-static inline void raw_spin_rq_unlock_irq(struct rq *rq)
+static __always_inline void raw_spin_rq_unlock_irq(struct rq *rq)
 {
 	raw_spin_rq_unlock(rq);
 	local_irq_enable();
-- 
2.52.0

From 35abc98ea4755eb5798697253f3d97de62eaf737 Mon Sep 17 00:00:00 2001
From: Eric Naim <dnaim@cachyos.org>
Date: Mon, 26 Jan 2026 17:33:31 +0800
Subject: [PATCH 6/9] hdmi

Signed-off-by: Eric Naim <dnaim@cachyos.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |  27 +-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 204 +++++++---
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c |  49 ++-
 .../gpu/drm/amd/display/dc/core/dc_resource.c |   5 +-
 drivers/gpu/drm/amd/display/dc/dc.h           |   1 +
 drivers/gpu/drm/amd/display/dc/dc_types.h     |   7 +-
 drivers/gpu/drm/amd/display/dc/dm_helpers.h   |   2 +-
 .../amd/display/include/ddc_service_types.h   |   1 +
 .../amd/display/modules/freesync/freesync.c   |   4 +
 .../amd/display/modules/inc/mod_info_packet.h |  18 +-
 .../display/modules/info_packet/info_packet.c | 348 ++++++++++++------
 drivers/gpu/drm/amd/include/amd_shared.h      |   6 +
 drivers/gpu/drm/drm_edid.c                    |  41 ++-
 include/drm/drm_connector.h                   |  47 +++
 15 files changed, 562 insertions(+), 200 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4dd9214873c1..59d5ecfc396b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -274,6 +274,8 @@ extern int amdgpu_rebar;
 
 extern int amdgpu_wbrf;
 extern int amdgpu_user_queue;
+extern int amdgpu_allm_mode;
+extern bool amdgpu_hdmi_vrr_desktop_mode;
 
 extern uint amdgpu_hdmi_hpd_debounce_delay_ms;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 69ad491ded1b..9630ebb37981 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -249,6 +249,8 @@ int amdgpu_umsch_mm_fwlog;
 int amdgpu_rebar = -1; /* auto */
 int amdgpu_user_queue = -1;
 uint amdgpu_hdmi_hpd_debounce_delay_ms;
+int amdgpu_allm_mode = 1;
+bool amdgpu_hdmi_vrr_desktop_mode = true;
 
 DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
 			"DRM_UT_CORE",
@@ -1134,7 +1136,7 @@ module_param_named(rebar, amdgpu_rebar, int, 0444);
 MODULE_PARM_DESC(user_queue, "Enable user queues (-1 = auto (default), 0 = disable, 1 = enable, 2 = enable UQs and disable KQs)");
 module_param_named(user_queue, amdgpu_user_queue, int, 0444);
 
-/*
+/**
  * DOC: hdmi_hpd_debounce_delay_ms (uint)
  * HDMI HPD disconnect debounce delay in milliseconds.
  *
@@ -1144,6 +1146,29 @@ module_param_named(user_queue, amdgpu_user_queue, int, 0444);
 MODULE_PARM_DESC(hdmi_hpd_debounce_delay_ms, "HDMI HPD disconnect debounce delay in milliseconds (0 to disable (by default), 1500 is common)");
 module_param_named(hdmi_hpd_debounce_delay_ms, amdgpu_hdmi_hpd_debounce_delay_ms, uint, 0644);
 
+/**
+ * DOC: allm_mode (int)
+ * Changes ALLM triggering mode (if sink supports ALLM). Possible values:
+ *
+ * -  0 = ALLM disabled
+ * -  1 = ALLM dynamically triggered based on VRR state / Game Content Type Hint
+ * -  2 = ALLM forced always on
+ */
+MODULE_PARM_DESC(allm_mode, "Changes ALLM trigger mode (0 = disable, 1 = enable (default), 2 = force enable)");
+module_param_named(allm_mode, amdgpu_allm_mode, int, 0644);
+
+/**
+ * DOC: hdmi_vrr_on_dekstop (bool)
+ * Enables FreeSync behavior mimicking by keeping HDMI VRR signalling active in
+ * fixed refresh rate conditions like normal desktop work/web browsing.
+ * Possible values:
+ *
+ * -  false = HDMI VRR is only enabled if refresh rate is truly variable
+ * -  true  = Mimics FreeSync behavior and keeps HDMI VRR always active
+ */
+MODULE_PARM_DESC(hdmi_vrr_desktop_mode, "Changes HDMI VRR desktop mode (false = disable, true = enable (default))");
+module_param_named(hdmi_vrr_desktop_mode, amdgpu_hdmi_vrr_desktop_mode, bool, 0644);
+
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 6ecddd7211eb..a0fad8c73db7 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2069,6 +2069,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	if (amdgpu_dc_debug_mask & DC_SKIP_DETECTION_LT)
 		adev->dm.dc->debug.skip_detection_link_training = true;
 
+	if (amdgpu_dc_debug_mask & DC_OVERRIDE_PCON_VRR_ID_CHECK)
+		adev->dm.dc->debug.override_pcon_vrr_id_check = true;
+
 	adev->dm.dc->debug.visual_confirm = amdgpu_dc_visual_confirm;
 
 	/* TODO: Remove after DP2 receiver gets proper support of Cable ID feature */
@@ -7370,7 +7373,7 @@ create_stream_for_sink(struct drm_connector *connector,
 	update_stream_signal(stream, sink);
 
 	if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
-		mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket);
+		mod_build_hf_vsif_infopacket(stream, &stream->hfvsif_infopacket);
 
 	if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
 	    stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
@@ -9605,7 +9608,8 @@ static void update_freesync_state_on_stream(
 
 	aconn = (struct amdgpu_dm_connector *)new_stream->dm_stream_context;
 
-	if (aconn && (aconn->as_type == FREESYNC_TYPE_PCON_IN_WHITELIST || aconn->vsdb_info.replay_mode)) {
+	if (aconn && (aconn->as_type == ADAPTIVE_SYNC_TYPE_PCON_ALLOWED ||
+		      aconn->vsdb_info.replay_mode)) {
 		pack_sdp_v1_3 = aconn->pack_sdp_v1_3;
 
 		if (aconn->vsdb_info.amd_vsdb_version == 1)
@@ -9619,6 +9623,9 @@ static void update_freesync_state_on_stream(
 					&new_stream->adaptive_sync_infopacket);
 	}
 
+	if (aconn && aconn->as_type == ADAPTIVE_SYNC_TYPE_HDMI)
+		packet_type = PACKET_TYPE_VTEM;
+
 	mod_freesync_build_vrr_infopacket(
 		dm->freesync_module,
 		new_stream,
@@ -13107,8 +13114,8 @@ static void parse_edid_displayid_vrr(struct drm_connector *connector,
 	}
 }
 
-static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector,
-			  const struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info)
+static int parse_amd_vsdb_did(struct amdgpu_dm_connector *aconnector,
+			      const struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info)
 {
 	u8 *edid_ext = NULL;
 	int i;
@@ -13124,6 +13131,9 @@ static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector,
 			break;
 	}
 
+	if (i == edid->extensions)
+		return false;
+
 	while (j < EDID_LENGTH - sizeof(struct amd_vsdb_block)) {
 		struct amd_vsdb_block *amd_vsdb = (struct amd_vsdb_block *)&edid_ext[j];
 		unsigned int ieeeId = (amd_vsdb->ieee_id[2] << 16) | (amd_vsdb->ieee_id[1] << 8) | (amd_vsdb->ieee_id[0]);
@@ -13142,9 +13152,9 @@ static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector,
 	return false;
 }
 
-static int parse_hdmi_amd_vsdb(struct amdgpu_dm_connector *aconnector,
-			       const struct edid *edid,
-			       struct amdgpu_hdmi_vsdb_info *vsdb_info)
+static int parse_amd_vsdb_cea(struct amdgpu_dm_connector *aconnector,
+			      const struct edid *edid,
+			      struct amdgpu_hdmi_vsdb_info *vsdb_info)
 {
 	u8 *edid_ext = NULL;
 	int i;
@@ -13174,6 +13184,82 @@ static int parse_hdmi_amd_vsdb(struct amdgpu_dm_connector *aconnector,
 	return valid_vsdb_found ? i : -ENODEV;
 }
 
+static bool is_monitor_range_invalid(struct drm_connector *conn)
+{
+	return conn->display_info.monitor_range.min_vfreq == 0 ||
+	       conn->display_info.monitor_range.max_vfreq == 0;
+}
+
+/*
+ * Returns true if (max_vfreq - min_vfreq) > 10
+ */
+static bool is_freesync_capable(struct drm_monitor_range_info *range)
+{
+	return (range->max_vfreq - range->min_vfreq) > 10;
+}
+
+static void monitor_range_from_vsdb(struct drm_connector *conn,
+				    struct amdgpu_hdmi_vsdb_info *vsdb)
+{
+	struct drm_monitor_range_info *range = &conn->display_info.monitor_range;
+
+	range->min_vfreq = vsdb->min_refresh_rate_hz;
+	range->max_vfreq = vsdb->max_refresh_rate_hz;
+}
+
+/**
+ * Get VRR range from HDMI VRR info in EDID. If VRRmax == 0,
+ * try getting upper bound from AMD vsdb (if passed).
+ *
+ * @conn: drm_connector with HDMI VRR info
+ * @vsdb: AMD vsdb from CAE. Can be NULL if not found.
+ */
+static void monitor_range_from_hdmi(struct drm_connector *conn,
+				    const struct amdgpu_hdmi_vsdb_info *vsdb)
+{
+	struct drm_monitor_range_info *range = &conn->display_info.monitor_range;
+	struct drm_hdmi_vrr_cap *caps = &conn->display_info.hdmi.vrr_cap;
+	u16 vrr_max = caps->vrr_max;
+
+	/* Try getting upper vrr bound from AMD vsdb */
+	if (vrr_max == 0 && vsdb)
+		vrr_max = vsdb->max_refresh_rate_hz;
+
+	/* Use max possible BRR value as a last resort */
+	if (vrr_max == 0)
+		vrr_max = VTEM_BRR_MAX;
+
+	range->min_vfreq = caps->vrr_min;
+	range->max_vfreq = vrr_max;
+}
+
+/*
+ * Returns true if connector is capable of freesync
+ * Optionally, can fetch the range from AMD vsdb
+ */
+static bool copy_range_to_amdgpu_connector(struct drm_connector *conn)
+{
+	struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(conn);
+	struct drm_monitor_range_info *range = &conn->display_info.monitor_range;
+
+	aconn->min_vfreq = range->min_vfreq;
+	aconn->max_vfreq = range->max_vfreq;
+
+	return is_freesync_capable(range);
+}
+
+/*
+ * Returns true if range from AMD vsdb is bigger
+ */
+static bool compare_ranges(struct drm_connector *conn,
+			   struct amdgpu_hdmi_vsdb_info *vsdb)
+{
+	struct drm_monitor_range_info *range = &conn->display_info.monitor_range;
+
+	return (vsdb->max_refresh_rate_hz - vsdb->min_refresh_rate_hz) >
+	       (range->max_vfreq - range->min_vfreq);
+}
+
 /**
  * amdgpu_dm_update_freesync_caps - Update Freesync capabilities
  *
@@ -13188,16 +13274,21 @@ static int parse_hdmi_amd_vsdb(struct amdgpu_dm_connector *aconnector,
 void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
 				    const struct drm_edid *drm_edid)
 {
-	int i = 0;
 	struct amdgpu_dm_connector *amdgpu_dm_connector =
 			to_amdgpu_dm_connector(connector);
 	struct dm_connector_state *dm_con_state = NULL;
 	struct dc_sink *sink;
 	struct amdgpu_device *adev = drm_to_adev(connector->dev);
 	struct amdgpu_hdmi_vsdb_info vsdb_info = {0};
+	struct amdgpu_hdmi_vsdb_info vsdb_did = {0};
+	struct dpcd_caps dpcd_caps = {0};
+	struct drm_hdmi_vrr_cap *hdmi_vrr;
 	const struct edid *edid;
 	bool freesync_capable = false;
-	enum adaptive_sync_type as_type = ADAPTIVE_SYNC_TYPE_NONE;
+	bool valid_vsdb_cea = false;
+	bool vsdb_freesync = false;
+	bool pcon_allowed = false;
+	bool is_pcon = false;
 
 	if (!connector->state) {
 		drm_err(adev_to_drm(adev), "%s - Connector has no state", __func__);
@@ -13225,63 +13316,74 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
 	if (!adev->dm.freesync_module || !dc_supports_vrr(sink->ctx->dce_version))
 		goto update;
 
+	/* Gather all data */
 	edid = drm_edid_raw(drm_edid); // FIXME: Get rid of drm_edid_raw()
+	valid_vsdb_cea = parse_amd_vsdb_cea(amdgpu_dm_connector, edid, &vsdb_info) >= 0;
+	vsdb_freesync = valid_vsdb_cea && vsdb_info.freesync_supported;
+	hdmi_vrr = &connector->display_info.hdmi.vrr_cap;
+
+	if (amdgpu_dm_connector->dc_link) {
+		dpcd_caps = amdgpu_dm_connector->dc_link->dpcd_caps;
+		is_pcon = dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER;
+		pcon_allowed = dm_helpers_is_vrr_pcon_allowed(
+			amdgpu_dm_connector->dc_link, connector->dev);
+	}
 
 	/* Some eDP panels only have the refresh rate range info in DisplayID */
-	if ((connector->display_info.monitor_range.min_vfreq == 0 ||
-	     connector->display_info.monitor_range.max_vfreq == 0))
+	if (is_monitor_range_invalid(connector))
 		parse_edid_displayid_vrr(connector, edid);
 
-	if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT ||
-		     sink->sink_signal == SIGNAL_TYPE_EDP)) {
-		if (amdgpu_dm_connector->dc_link &&
-		    amdgpu_dm_connector->dc_link->dpcd_caps.allow_invalid_MSA_timing_param) {
-			amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq;
-			amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq;
-			if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10)
-				freesync_capable = true;
-		}
+	/* DP & eDP excluding PCONs */
+	if ((sink->sink_signal == SIGNAL_TYPE_EDP ||
+	     sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) && !is_pcon) {
+		/*
+		 * Many monitors expose AMD vsdb in CAE even for DP and their
+		 * monitor ranges do not contain Range Limits Only flag
+		 */
+		if (valid_vsdb_cea && is_monitor_range_invalid(connector))
+			monitor_range_from_vsdb(connector, &vsdb_info);
 
-		parse_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info);
+		/* Use bigger range if found in AMD vsdb */
+		if (valid_vsdb_cea && compare_ranges(connector, &vsdb_info))
+			monitor_range_from_vsdb(connector, &vsdb_info);
 
-		if (vsdb_info.replay_mode) {
-			amdgpu_dm_connector->vsdb_info.replay_mode = vsdb_info.replay_mode;
-			amdgpu_dm_connector->vsdb_info.amd_vsdb_version = vsdb_info.amd_vsdb_version;
-			amdgpu_dm_connector->as_type = ADAPTIVE_SYNC_TYPE_EDP;
-		}
+		if (dpcd_caps.allow_invalid_MSA_timing_param)
+			freesync_capable = copy_range_to_amdgpu_connector(connector);
 
-	} else if (drm_edid && sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A) {
-		i = parse_hdmi_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info);
-		if (i >= 0 && vsdb_info.freesync_supported) {
-			amdgpu_dm_connector->min_vfreq = vsdb_info.min_refresh_rate_hz;
-			amdgpu_dm_connector->max_vfreq = vsdb_info.max_refresh_rate_hz;
-			if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10)
-				freesync_capable = true;
+		/* eDP */
+		if (edid)
+			parse_amd_vsdb_did(amdgpu_dm_connector, edid, &vsdb_did);
 
-			connector->display_info.monitor_range.min_vfreq = vsdb_info.min_refresh_rate_hz;
-			connector->display_info.monitor_range.max_vfreq = vsdb_info.max_refresh_rate_hz;
+		if (vsdb_did.replay_mode) {
+			amdgpu_dm_connector->vsdb_info.replay_mode = vsdb_did.replay_mode;
+			amdgpu_dm_connector->vsdb_info.amd_vsdb_version = vsdb_did.amd_vsdb_version;
+			amdgpu_dm_connector->as_type = ADAPTIVE_SYNC_TYPE_EDP;
 		}
-	}
 
-	if (amdgpu_dm_connector->dc_link)
-		as_type = dm_get_adaptive_sync_support_type(amdgpu_dm_connector->dc_link);
+	/* HDMI */
+	} else if (sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A) {
+		/* Prefer HDMI VRR */
+		if (hdmi_vrr->supported) {
+			amdgpu_dm_connector->as_type = ADAPTIVE_SYNC_TYPE_HDMI;
+			monitor_range_from_hdmi(connector, valid_vsdb_cea ? &vsdb_info : NULL);
+		} else if (vsdb_freesync)
+			monitor_range_from_vsdb(connector, &vsdb_info);
 
-	if (as_type == FREESYNC_TYPE_PCON_IN_WHITELIST) {
-		i = parse_hdmi_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info);
-		if (i >= 0 && vsdb_info.freesync_supported && vsdb_info.amd_vsdb_version > 0) {
+		freesync_capable = copy_range_to_amdgpu_connector(connector);
 
-			amdgpu_dm_connector->pack_sdp_v1_3 = true;
-			amdgpu_dm_connector->as_type = as_type;
+	/* DP -> HDMI PCON */
+	} else if (pcon_allowed) {
+		/* Prefer HDMI VRR */
+		if (hdmi_vrr->supported)
+			monitor_range_from_hdmi(connector, valid_vsdb_cea ? &vsdb_info : NULL);
+		else if (vsdb_freesync) {
 			amdgpu_dm_connector->vsdb_info = vsdb_info;
-
-			amdgpu_dm_connector->min_vfreq = vsdb_info.min_refresh_rate_hz;
-			amdgpu_dm_connector->max_vfreq = vsdb_info.max_refresh_rate_hz;
-			if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10)
-				freesync_capable = true;
-
-			connector->display_info.monitor_range.min_vfreq = vsdb_info.min_refresh_rate_hz;
-			connector->display_info.monitor_range.max_vfreq = vsdb_info.max_refresh_rate_hz;
+			monitor_range_from_vsdb(connector, &vsdb_info);
 		}
+
+		amdgpu_dm_connector->pack_sdp_v1_3 = true;
+		amdgpu_dm_connector->as_type = ADAPTIVE_SYNC_TYPE_PCON_ALLOWED;
+		freesync_capable = copy_range_to_amdgpu_connector(connector);
 	}
 
 update:
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index e5e993d3ef74..6413f2a587d5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -137,7 +137,12 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
 				  edid_caps->display_name,
 				  AUDIO_INFO_DISPLAY_NAME_SIZE_IN_CHARS);
 
-	edid_caps->edid_hdmi = connector->display_info.is_hdmi;
+	if (connector->display_info.is_hdmi) {
+		edid_caps->edid_hdmi = true;
+		edid_caps->allm = connector->display_info.hdmi.allm;
+		edid_caps->fva = connector->display_info.hdmi.vrr_cap.fva;
+		edid_caps->hdmi_vrr = connector->display_info.hdmi.vrr_cap.supported;
+	}
 
 	if (edid_caps->edid_hdmi)
 		populate_hdmi_info_from_connector(&connector->display_info.hdmi, edid_caps);
@@ -1375,40 +1380,32 @@ void dm_helpers_dp_mst_update_branch_bandwidth(
 	// TODO
 }
 
-static bool dm_is_freesync_pcon_whitelist(const uint32_t branch_dev_id)
+bool dm_helpers_is_vrr_pcon_allowed(const struct dc_link *link, const struct drm_device *dev)
 {
-	bool ret_val = false;
+	if (link->dpcd_caps.dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER)
+		return false;
 
-	switch (branch_dev_id) {
+	if (!link->dpcd_caps.allow_invalid_MSA_timing_param)
+		return false;
+
+	if (!link->dpcd_caps.adaptive_sync_caps.dp_adap_sync_caps.bits.ADAPTIVE_SYNC_SDP_SUPPORT)
+		return false;
+
+	switch (link->dpcd_caps.branch_dev_id) {
 	case DP_BRANCH_DEVICE_ID_0060AD:
 	case DP_BRANCH_DEVICE_ID_00E04C:
 	case DP_BRANCH_DEVICE_ID_90CC24:
-		ret_val = true;
-		break;
-	default:
-		break;
+	case DP_BRANCH_DEVICE_ID_2B02F0:
+		return true;
 	}
 
-	return ret_val;
-}
-
-enum adaptive_sync_type dm_get_adaptive_sync_support_type(struct dc_link *link)
-{
-	struct dpcd_caps *dpcd_caps = &link->dpcd_caps;
-	enum adaptive_sync_type as_type = ADAPTIVE_SYNC_TYPE_NONE;
-
-	switch (dpcd_caps->dongle_type) {
-	case DISPLAY_DONGLE_DP_HDMI_CONVERTER:
-		if (dpcd_caps->adaptive_sync_caps.dp_adap_sync_caps.bits.ADAPTIVE_SYNC_SDP_SUPPORT == true &&
-			dpcd_caps->allow_invalid_MSA_timing_param == true &&
-			dm_is_freesync_pcon_whitelist(dpcd_caps->branch_dev_id))
-			as_type = FREESYNC_TYPE_PCON_IN_WHITELIST;
-		break;
-	default:
-		break;
+	if (link->dc->debug.override_pcon_vrr_id_check) {
+		drm_info(dev, "Overriding VRR PCON check for ID: 0x%06x\n",
+			 link->dpcd_caps.branch_dev_id);
+		return true;
 	}
 
-	return as_type;
+	return false;
 }
 
 bool dm_helpers_is_fullscreen(struct dc_context *ctx, struct dc_stream_state *stream)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 848c267ef11e..b779aac28dfa 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -44,6 +44,7 @@
 #include "clk_mgr.h"
 #include "dc_state_priv.h"
 #include "dc_stream_priv.h"
+#include "modules/inc/mod_info_packet.h"
 
 #include "virtual/virtual_link_hwss.h"
 #include "link/hwss/link_hwss_dio.h"
@@ -4503,7 +4504,6 @@ static void set_avi_info_frame(
 	unsigned int vic = pipe_ctx->stream->timing.vic;
 	unsigned int rid = pipe_ctx->stream->timing.rid;
 	unsigned int fr_ind = pipe_ctx->stream->timing.fr_index;
-	enum dc_timing_3d_format format;
 
 	if (stream->avi_infopacket.valid) {
 		*info_packet = stream->avi_infopacket;
@@ -4657,9 +4657,8 @@ static void set_avi_info_frame(
 	///VIC
 	if (pipe_ctx->stream->timing.hdmi_vic != 0)
 		vic = 0;
-	format = stream->timing.timing_3d_format;
 	/*todo, add 3DStereo support*/
-	if (format != TIMING_3D_FORMAT_NONE) {
+	if (!is_hdmi_vic_mode(pipe_ctx->stream)) {
 		// Based on HDMI specs hdmi vic needs to be converted to cea vic when 3D is enabled
 		switch (pipe_ctx->stream->timing.hdmi_vic) {
 		case 1:
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 29edfa51ea2c..f24ece2eafc5 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -1039,6 +1039,7 @@ struct dc_debug_options {
 	bool scl_reset_length10;
 	bool hdmi20_disable;
 	bool skip_detection_link_training;
+	bool override_pcon_vrr_id_check;
 	uint32_t edid_read_retry_times;
 	unsigned int force_odm_combine; //bit vector based on otg inst
 	unsigned int seamless_boot_odm_combine;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index f46039f64203..e9281e022a7c 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -210,9 +210,14 @@ struct dc_edid_caps {
 
 	uint32_t max_tmds_clk_mhz;
 
-	/*HDMI 2.0 caps*/
+	/* HDMI 2.0 caps */
 	bool lte_340mcsc_scramble;
 
+	/* HDMI 2.1 caps */
+	bool allm;
+	bool fva;
+	bool hdmi_vrr;
+
 	bool edid_hdmi;
 	bool hdr_supported;
 	bool rr_capable;
diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
index 9d160b39e8c5..ea94c52d2b87 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
@@ -219,10 +219,10 @@ int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
 		const struct dc_link *link,
 		struct set_config_cmd_payload *payload,
 		enum set_config_status *operation_result);
-enum adaptive_sync_type dm_get_adaptive_sync_support_type(struct dc_link *link);
 
 enum dc_edid_status dm_helpers_get_sbios_edid(struct dc_link *link, struct dc_edid *edid);
 
+bool dm_helpers_is_vrr_pcon_allowed(const struct dc_link *link, const struct drm_device *dev);
 bool dm_helpers_is_fullscreen(struct dc_context *ctx, struct dc_stream_state *stream);
 bool dm_helpers_is_hdr_on(struct dc_context *ctx, struct dc_stream_state *stream);
 
diff --git a/drivers/gpu/drm/amd/display/include/ddc_service_types.h b/drivers/gpu/drm/amd/display/include/ddc_service_types.h
index 1c603b12957f..e838f7c1269c 100644
--- a/drivers/gpu/drm/amd/display/include/ddc_service_types.h
+++ b/drivers/gpu/drm/amd/display/include/ddc_service_types.h
@@ -36,6 +36,7 @@
 #define DP_BRANCH_DEVICE_ID_006037 0x006037
 #define DP_BRANCH_DEVICE_ID_001CF8 0x001CF8
 #define DP_BRANCH_DEVICE_ID_0060AD 0x0060AD
+#define DP_BRANCH_DEVICE_ID_2B02F0 0x2B02F0 /* Chrontel CH7218 */
 #define DP_BRANCH_HW_REV_10 0x10
 #define DP_BRANCH_HW_REV_20 0x20
 
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index 1aae46d703ba..db197cf048e1 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -27,6 +27,7 @@
 #include "dc.h"
 #include "mod_freesync.h"
 #include "core_types.h"
+#include "mod_info_packet.h"
 
 #define MOD_FREESYNC_MAX_CONCURRENT_STREAMS  32
 
@@ -955,6 +956,9 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync,
 		return;
 
 	switch (packet_type) {
+	case PACKET_TYPE_VTEM:
+		mod_build_vtem_infopacket(stream, vrr, infopacket);
+		break;
 	case PACKET_TYPE_FS_V3:
 		build_vrr_infopacket_v3(stream->signal, vrr, app_tf, infopacket, stream->freesync_on_desktop);
 		break;
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
index 66dc9a19aebe..6383966d01c5 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
@@ -33,6 +33,9 @@ struct dc_stream_state;
 struct dc_info_packet;
 struct mod_vrr_params;
 
+#define VTEM_BRR_MAX 1023
+
+bool is_hdmi_vic_mode(const struct dc_stream_state *stream);
 void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 		struct dc_info_packet *info_packet,
 		enum dc_color_space cs,
@@ -41,12 +44,17 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream,
 		struct dc_info_packet *info_packet);
 
+void mod_build_vtem_infopacket(const struct dc_stream_state *stream,
+		const struct mod_vrr_params *vrr,
+		struct dc_info_packet *infopacket);
+
 enum adaptive_sync_type {
-	ADAPTIVE_SYNC_TYPE_NONE                  = 0,
-	ADAPTIVE_SYNC_TYPE_DP                    = 1,
-	FREESYNC_TYPE_PCON_IN_WHITELIST          = 2,
-	FREESYNC_TYPE_PCON_NOT_IN_WHITELIST      = 3,
-	ADAPTIVE_SYNC_TYPE_EDP                   = 4,
+	ADAPTIVE_SYNC_TYPE_NONE             = 0,
+	ADAPTIVE_SYNC_TYPE_DP               = 1,
+	ADAPTIVE_SYNC_TYPE_PCON_ALLOWED     = 2,
+	ADAPTIVE_SYNC_TYPE_PCON_NOT_ALLOWED = 3,
+	ADAPTIVE_SYNC_TYPE_EDP              = 4,
+	ADAPTIVE_SYNC_TYPE_HDMI             = 5,
 };
 
 enum adaptive_sync_sdp_version {
diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index b3d55cac3569..cf5a1ccb87f4 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -23,12 +23,13 @@
  *
  */
 
-#include "mod_info_packet.h"
+#include "amdgpu.h"
 #include "core_types.h"
+#include "dc.h"
 #include "dc_types.h"
-#include "mod_shared.h"
 #include "mod_freesync.h"
-#include "dc.h"
+#include "mod_info_packet.h"
+#include "mod_shared.h"
 
 enum vsc_packet_revision {
 	vsc_packet_undefined = 0,
@@ -44,76 +45,64 @@ enum vsc_packet_revision {
 	vsc_packet_rev5 = 5,
 };
 
+#define HDMI_INFOFRAME_TYPE_EMP    0x7F
 #define HDMI_INFOFRAME_TYPE_VENDOR 0x81
-#define HF_VSIF_VERSION 1
-
-// VTEM Byte Offset
-#define VTEM_PB0		0
-#define VTEM_PB1		1
-#define VTEM_PB2		2
-#define VTEM_PB3		3
-#define VTEM_PB4		4
-#define VTEM_PB5		5
-#define VTEM_PB6		6
-
-#define VTEM_MD0		7
-#define VTEM_MD1		8
-#define VTEM_MD2		9
-#define VTEM_MD3		10
-
-
-// VTEM Byte Masks
-//PB0
-#define MASK_VTEM_PB0__RESERVED0  0x01
-#define MASK_VTEM_PB0__SYNC       0x02
-#define MASK_VTEM_PB0__VFR        0x04
-#define MASK_VTEM_PB0__AFR        0x08
-#define MASK_VTEM_PB0__DS_TYPE    0x30
-	//0: Periodic pseudo-static EM Data Set
-	//1: Periodic dynamic EM Data Set
-	//2: Unique EM Data Set
-	//3: Reserved
-#define MASK_VTEM_PB0__END        0x40
-#define MASK_VTEM_PB0__NEW        0x80
-
-//PB1
-#define MASK_VTEM_PB1__RESERVED1 0xFF
-
-//PB2
-#define MASK_VTEM_PB2__ORGANIZATION_ID 0xFF
-	//0: This is a Vendor Specific EM Data Set
-	//1: This EM Data Set is defined by This Specification (HDMI 2.1 r102.clean)
-	//2: This EM Data Set is defined by CTA-861-G
-	//3: This EM Data Set is defined by VESA
-//PB3
-#define MASK_VTEM_PB3__DATA_SET_TAG_MSB    0xFF
-//PB4
-#define MASK_VTEM_PB4__DATA_SET_TAG_LSB    0xFF
-//PB5
-#define MASK_VTEM_PB5__DATA_SET_LENGTH_MSB 0xFF
-//PB6
-#define MASK_VTEM_PB6__DATA_SET_LENGTH_LSB 0xFF
-
-
-
-//PB7-27 (20 bytes):
-//PB7 = MD0
-#define MASK_VTEM_MD0__VRR_EN         0x01
-#define MASK_VTEM_MD0__M_CONST        0x02
-#define MASK_VTEM_MD0__QMS_EN         0x04
-#define MASK_VTEM_MD0__RESERVED2      0x08
-#define MASK_VTEM_MD0__FVA_FACTOR_M1  0xF0
-
-//MD1
-#define MASK_VTEM_MD1__BASE_VFRONT    0xFF
-
-//MD2
-#define MASK_VTEM_MD2__BASE_REFRESH_RATE_98  0x03
-#define MASK_VTEM_MD2__RB                    0x04
-#define MASK_VTEM_MD2__NEXT_TFR              0xF8
-
-//MD3
-#define MASK_VTEM_MD3__BASE_REFRESH_RATE_07  0xFF
+#define HDMI_INFOFRAME_LENGTH_MASK 0x1F
+#define HF_VSIF_VERSION  1
+#define HF_VSIF_3D_BIT   0
+#define HF_VSIF_ALLM_BIT 1
+
+enum allm_trigger_mode {
+	ALLM_MODE_DISABLED        = 0,
+	ALLM_MODE_ENABLED_DYNAMIC = 1,
+	ALLM_MODE_ENABLED_FORCED  = 2,
+};
+
+#define VTEM_ORG_ID          1
+#define VTEM_DATA_SET_TAG    1
+#define VTEM_DATA_SET_LENGTH 4
+
+#define VTEM_M_CONST    0
+#define VTEM_FVA_FACTOR 0
+
+#define VTEM_BRR_MASK_UPPER 0x03
+#define VTEM_BRR_MASK_LOWER 0xFF
+
+/* VTEM Byte Offset */
+#define VTEM_PB0 0
+#define VTEM_PB1 1
+#define VTEM_PB2 2
+#define VTEM_PB3 3
+#define VTEM_PB4 4
+#define VTEM_PB5 5
+#define VTEM_PB6 6
+
+#define VTEM_MD0 7
+#define VTEM_MD1 8
+#define VTEM_MD2 9
+#define VTEM_MD3 10
+
+/* Extended Metadata Packet */
+/* Header */
+#define EMP_LAST_BIT  6
+#define EMP_FIRST_BIT 7
+/* PB0 */
+#define EMP_SNC_BIT 1
+#define EMP_VFR_BIT 2
+#define EMP_AFR_BIT 3
+#define EMP_DST_BIT 4
+#define EMP_END_BIT 6
+#define EMP_NEW_BIT 7
+/* PB7 = MD0 */
+#define VTEM_VRR_BIT     0
+#define VTEM_M_CONST_BIT 1
+#define VTEM_FVA_BIT     4
+/* MD1 Base_Vfront */
+/* MD2 */
+#define VTEM_BRR_UPPER_BIT 0
+#define VTEM_RB_BIT        2
+/* MD3 BRR Lower */
+
 
 enum ColorimetryRGBDP {
 	ColorimetryRGB_DP_sRGB               = 0,
@@ -441,9 +430,49 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 	}
 }
 
+static bool is_hdmi_allm_mode(const struct dc_stream_state *stream)
+{
+	/* Sink doesn't expose ALLM support in edid */
+	if (!stream->link->local_sink->edid_caps.allm)
+		return false;
+
+	switch (amdgpu_allm_mode) {
+	case ALLM_MODE_DISABLED:
+		return false;
+
+	case ALLM_MODE_ENABLED_DYNAMIC:
+		break;
+
+	case ALLM_MODE_ENABLED_FORCED:
+		return true;
+	}
+
+	return stream->content_type == DISPLAY_CONTENT_TYPE_GAME ||
+	       stream->vrr_active_variable;
+}
+
+bool is_hdmi_vic_mode(const struct dc_stream_state *stream)
+{
+	bool allm = is_hdmi_allm_mode(stream);
+	bool stereo = stream->view_format != VIEW_3D_FORMAT_NONE;
+
+	if (stream->timing.hdmi_vic == 0)
+		return false;
+
+	if (stream->timing.h_total < 3840 ||
+	    stream->timing.v_total < 2160)
+		return false;
+
+	if (stereo || allm)
+		return false;
+
+	return true;
+}
+
 /**
  *  mod_build_hf_vsif_infopacket - Prepare HDMI Vendor Specific info frame.
  *                                 Follows HDMI Spec to build up Vendor Specific info frame
+ *                                 Conforms to h14b-vsif or hf-vsif based on the capabilities
  *
  *  @stream:      contains data we may need to construct VSIF (i.e. timing_3d_format, etc.)
  *  @info_packet: output structure where to store VSIF
@@ -451,63 +480,76 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream,
 		struct dc_info_packet *info_packet)
 {
-		unsigned int length = 5;
 		bool hdmi_vic_mode = false;
+		bool allm = false;
+		bool stereo = false;
 		uint8_t checksum = 0;
-		uint32_t i = 0;
+		uint8_t offset = 0;
+		uint8_t i = 0;
+		uint8_t length = 5;
+		uint32_t oui = HDMI_IEEE_OUI;
 		enum dc_timing_3d_format format;
 
 		info_packet->valid = false;
-		format = stream->timing.timing_3d_format;
-		if (stream->view_format == VIEW_3D_FORMAT_NONE)
-			format = TIMING_3D_FORMAT_NONE;
 
-		if (stream->timing.hdmi_vic != 0
-				&& stream->timing.h_total >= 3840
-				&& stream->timing.v_total >= 2160
-				&& format == TIMING_3D_FORMAT_NONE)
-			hdmi_vic_mode = true;
+		allm = is_hdmi_allm_mode(stream);
+		format = stream->view_format == VIEW_3D_FORMAT_NONE ?
+			 TIMING_3D_FORMAT_NONE :
+			 stream->timing.timing_3d_format;
+		stereo = format != TIMING_3D_FORMAT_NONE;
+		hdmi_vic_mode = is_hdmi_vic_mode(stream);
 
-		if ((format == TIMING_3D_FORMAT_NONE) && !hdmi_vic_mode)
+		if (!stereo && !hdmi_vic_mode && !allm)
 			return;
 
-		info_packet->sb[1] = 0x03;
-		info_packet->sb[2] = 0x0C;
-		info_packet->sb[3] = 0x00;
+		if (allm)
+			oui = HDMI_FORUM_IEEE_OUI;
 
-		if (format != TIMING_3D_FORMAT_NONE)
-			info_packet->sb[4] = (2 << 5);
+		info_packet->sb[1] = oui & 0xff;
+		info_packet->sb[2] = (oui >> 8) & 0xff;
+		info_packet->sb[3] = (oui >> 16) & 0xff;
 
-		else if (hdmi_vic_mode)
-			info_packet->sb[4] = (1 << 5);
+		if (oui == HDMI_FORUM_IEEE_OUI) {
+			offset = 2;
+			length += 2;
+			info_packet->sb[4] = HF_VSIF_VERSION;
+			info_packet->sb[5] = stereo << HF_VSIF_3D_BIT;
+			info_packet->sb[5] = allm << HF_VSIF_ALLM_BIT;
+		}
 
-		switch (format) {
-		case TIMING_3D_FORMAT_HW_FRAME_PACKING:
-		case TIMING_3D_FORMAT_SW_FRAME_PACKING:
-			info_packet->sb[5] = (0x0 << 4);
-			break;
+		if (stereo) {
+			info_packet->sb[4 + offset] = (2 << 5);
 
-		case TIMING_3D_FORMAT_SIDE_BY_SIDE:
-		case TIMING_3D_FORMAT_SBS_SW_PACKED:
-			info_packet->sb[5] = (0x8 << 4);
-			length = 6;
-			break;
+			switch (format) {
+			case TIMING_3D_FORMAT_HW_FRAME_PACKING:
+			case TIMING_3D_FORMAT_SW_FRAME_PACKING:
+				info_packet->sb[5 + offset] = (0x0 << 4);
+				break;
 
-		case TIMING_3D_FORMAT_TOP_AND_BOTTOM:
-		case TIMING_3D_FORMAT_TB_SW_PACKED:
-			info_packet->sb[5] = (0x6 << 4);
-			break;
+			case TIMING_3D_FORMAT_SIDE_BY_SIDE:
+			case TIMING_3D_FORMAT_SBS_SW_PACKED:
+				info_packet->sb[5 + offset] = (0x8 << 4);
+				++length;
+				break;
 
-		default:
-			break;
-		}
+			case TIMING_3D_FORMAT_TOP_AND_BOTTOM:
+			case TIMING_3D_FORMAT_TB_SW_PACKED:
+				info_packet->sb[5 + offset] = (0x6 << 4);
+				break;
 
-		if (hdmi_vic_mode)
+			default:
+				break;
+			}
+
+		/* Doesn't need the offset as it can't be used with hf-vsif */
+		} else if (hdmi_vic_mode) {
+			info_packet->sb[4] = (1 << 5);
 			info_packet->sb[5] = stream->timing.hdmi_vic;
+		}
 
 		info_packet->hb0 = HDMI_INFOFRAME_TYPE_VENDOR;
 		info_packet->hb1 = 0x01;
-		info_packet->hb2 = (uint8_t) (length);
+		info_packet->hb2 = length & HDMI_INFOFRAME_LENGTH_MASK;
 
 		checksum += info_packet->hb0;
 		checksum += info_packet->hb1;
@@ -521,6 +563,89 @@ void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream,
 		info_packet->valid = true;
 }
 
+static void build_vtem_infopacket_header(struct dc_info_packet *infopacket)
+{
+	uint8_t pb0 = 0;
+
+	/* might need logic in the future */
+	pb0 |= 0 << EMP_SNC_BIT;
+	pb0 |= 1 << EMP_VFR_BIT;
+	pb0 |= 0 << EMP_AFR_BIT;
+	pb0 |= 0 << EMP_DST_BIT;
+	pb0 |= 0 << EMP_END_BIT;
+	pb0 |= 1 << EMP_NEW_BIT;
+
+	infopacket->hb0 = HDMI_INFOFRAME_TYPE_EMP;
+	infopacket->hb1 = (1 << EMP_FIRST_BIT) | (1 << EMP_LAST_BIT);
+	infopacket->hb2 = 0; // sequence
+
+	infopacket->sb[VTEM_PB0] = pb0;
+	infopacket->sb[VTEM_PB2] = VTEM_ORG_ID;
+	infopacket->sb[VTEM_PB4] = VTEM_DATA_SET_TAG;
+	infopacket->sb[VTEM_PB6] = VTEM_DATA_SET_LENGTH;
+}
+
+static void build_vtem_infopacket_data(const struct dc_stream_state *stream,
+		const struct mod_vrr_params *vrr,
+		struct dc_info_packet *infopacket)
+{
+	unsigned int hblank = 0;
+	unsigned int brr = 0;
+	bool hdmi_vic_mode = false;
+	bool vrr_active = false;
+	bool rb = false;
+
+	hdmi_vic_mode = is_hdmi_vic_mode(stream);
+
+	if (amdgpu_hdmi_vrr_desktop_mode) {
+		vrr_active = vrr->state != VRR_STATE_UNSUPPORTED &&
+			     vrr->state != VRR_STATE_DISABLED;
+	} else {
+		vrr_active = vrr->state == VRR_STATE_ACTIVE_VARIABLE ||
+			     vrr->state == VRR_STATE_ACTIVE_FIXED;
+	}
+
+	infopacket->sb[VTEM_MD0] = VTEM_M_CONST << VTEM_M_CONST_BIT;
+	infopacket->sb[VTEM_MD0] |= VTEM_FVA_FACTOR << VTEM_FVA_BIT;
+	infopacket->sb[VTEM_MD0] |= vrr_active << VTEM_VRR_BIT;
+
+	infopacket->sb[VTEM_MD1] = 0;
+	infopacket->sb[VTEM_MD2] = 0;
+	infopacket->sb[VTEM_MD3] = 0;
+
+	if (hdmi_vic_mode || !vrr_active)
+		return;
+	/*
+	 * In accordance with CVT 1.2 and CVT 2.1:
+	 * Reduced Blanking standard defines a fixed value of
+	 * 160 for hblank, further reduced to 80 in RB2. RB3 uses
+	 * fixed hblank of 80 pixels + up to 120 additional pixels
+	 * in 8-pixel steps.
+	 */
+	hblank = stream->timing.h_total - stream->timing.h_addressable;
+	rb = (hblank >= 80 && hblank <= 200 && hblank % 8 == 0);
+	brr = mod_freesync_calc_nominal_field_rate(stream) / 1000000;
+
+	if (brr > VTEM_BRR_MAX) {
+		infopacket->valid = false;
+		return;
+	}
+
+	infopacket->sb[VTEM_MD1] = (uint8_t) stream->timing.v_front_porch;
+	infopacket->sb[VTEM_MD2] = rb << VTEM_RB_BIT;
+	infopacket->sb[VTEM_MD2] |= (brr & VTEM_BRR_MASK_UPPER) >> 8;
+	infopacket->sb[VTEM_MD3] = brr & VTEM_BRR_MASK_LOWER;
+}
+
+void mod_build_vtem_infopacket(const struct dc_stream_state *stream,
+		const struct mod_vrr_params *vrr,
+		struct dc_info_packet *infopacket)
+{
+	infopacket->valid = true;
+	build_vtem_infopacket_header(infopacket);
+	build_vtem_infopacket_data(stream, vrr, infopacket);
+}
+
 void mod_build_adaptive_sync_infopacket(const struct dc_stream_state *stream,
 		enum adaptive_sync_type asType,
 		const struct AS_Df_params *param,
@@ -535,12 +660,13 @@ void mod_build_adaptive_sync_infopacket(const struct dc_stream_state *stream,
 		if (stream != NULL)
 			mod_build_adaptive_sync_infopacket_v2(stream, param, info_packet);
 		break;
-	case FREESYNC_TYPE_PCON_IN_WHITELIST:
+	case ADAPTIVE_SYNC_TYPE_PCON_ALLOWED:
 	case ADAPTIVE_SYNC_TYPE_EDP:
 		mod_build_adaptive_sync_infopacket_v1(info_packet);
 		break;
 	case ADAPTIVE_SYNC_TYPE_NONE:
-	case FREESYNC_TYPE_PCON_NOT_IN_WHITELIST:
+	case ADAPTIVE_SYNC_TYPE_PCON_NOT_ALLOWED:
+	case ADAPTIVE_SYNC_TYPE_HDMI:
 	default:
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 17945094a138..839cfe2eeabe 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -411,6 +411,12 @@ enum DC_DEBUG_MASK {
 	 * @DC_SKIP_DETECTION_LT: (0x200000) If set, skip detection link training
 	 */
 	DC_SKIP_DETECTION_LT = 0x200000,
+
+	/**
+	 * @DC_OVERRIDE_PCON_VRR_ID_CHECK: (0x400000) If set, always return true if checking for
+	 * PCON VRR compatibility and print it's ID in kernel log.
+	 */
+	DC_OVERRIDE_PCON_VRR_ID_CHECK = 0x400000,
 };
 
 enum amd_dpm_forced_level;
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 26bb7710a462..056eff8cbd1a 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -6152,6 +6152,33 @@ static void drm_parse_ycbcr420_deep_color_info(struct drm_connector *connector,
 	hdmi->y420_dc_modes = dc_mask;
 }
 
+static void drm_parse_hdmi_gaming_info(struct drm_hdmi_info *hdmi, const u8 *db)
+{
+	struct drm_hdmi_vrr_cap *vrr = &hdmi->vrr_cap;
+
+	if (cea_db_payload_len(db) < 8)
+		return;
+
+	hdmi->fapa_start_location = db[8] & DRM_EDID_FAPA_START_LOCATION;
+	hdmi->allm = db[8] & DRM_EDID_ALLM;
+	vrr->fva = db[8] & DRM_EDID_FVA;
+	vrr->cnmvrr = db[8] & DRM_EDID_CNMVRR;
+	vrr->cinema_vrr = db[8] & DRM_EDID_CINEMA_VRR;
+	vrr->mdelta = db[8] & DRM_EDID_MDELTA;
+
+	if (cea_db_payload_len(db) < 9)
+		return;
+
+	vrr->vrr_min = db[9] & DRM_EDID_VRR_MIN_MASK;
+	vrr->supported = (vrr->vrr_min > 0 && vrr->vrr_min <= 48);
+
+	if (cea_db_payload_len(db) < 10)
+		return;
+
+	vrr->vrr_max = (db[9] & DRM_EDID_VRR_MAX_UPPER_MASK) << 2 | db[10];
+	vrr->supported &= (vrr->vrr_max == 0 || vrr->vrr_max >= 100);
+}
+
 static void drm_parse_dsc_info(struct drm_hdmi_dsc_cap *hdmi_dsc,
 			       const u8 *hf_scds)
 {
@@ -6277,7 +6304,7 @@ static void drm_parse_hdmi_forum_scds(struct drm_connector *connector,
 	}
 
 	drm_parse_ycbcr420_deep_color_info(connector, hf_scds);
-
+	drm_parse_hdmi_gaming_info(&connector->display_info.hdmi, hf_scds);
 	if (cea_db_payload_len(hf_scds) >= 11 && hf_scds[11]) {
 		drm_parse_dsc_info(hdmi_dsc, hf_scds);
 		dsc_support = true;
@@ -6287,6 +6314,18 @@ static void drm_parse_hdmi_forum_scds(struct drm_connector *connector,
 		    "[CONNECTOR:%d:%s] HF-VSDB: max TMDS clock: %d KHz, HDMI 2.1 support: %s, DSC 1.2 support: %s\n",
 		    connector->base.id, connector->name,
 		    max_tmds_clock, str_yes_no(max_frl_rate), str_yes_no(dsc_support));
+	drm_dbg_kms(connector->dev,
+		    "[CONNECTOR:%d:%s] FAPA in blanking: %s, ALLM support: %s, Fast Vactive support: %s\n",
+		    connector->base.id, connector->name, str_yes_no(hdmi->fapa_start_location),
+		    str_yes_no(hdmi->allm), str_yes_no(hdmi->vrr_cap.fva));
+	drm_dbg_kms(connector->dev,
+		    "[CONNECTOR:%d:%s] Negative M VRR support: %s, CinemaVRR support: %s, Mdelta: %d\n",
+		    connector->base.id, connector->name, str_yes_no(hdmi->vrr_cap.cnmvrr),
+		    str_yes_no(hdmi->vrr_cap.cinema_vrr), hdmi->vrr_cap.mdelta);
+	drm_dbg_kms(connector->dev,
+		    "[CONNECTOR:%d:%s] VRRmin: %u, VRRmax: %u, VRR supported: %s\n",
+		    connector->base.id, connector->name, hdmi->vrr_cap.vrr_min,
+		    hdmi->vrr_cap.vrr_max, str_yes_no(hdmi->vrr_cap.supported));
 }
 
 static void drm_parse_hdmi_deep_color_info(struct drm_connector *connector,
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 8f34f4b8183d..dab9d5521f41 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -254,6 +254,44 @@ struct drm_scdc {
 	struct drm_scrambling scrambling;
 };
 
+/**
+ * struct drm_hdmi_vrr_cap - Information about VRR capabilities of a HDMI sink
+ *
+ * Describes the VRR support provided by HDMI 2.1 sink. The information is
+ * fetched fom additional HFVSDB blocks defined for HDMI 2.1.
+ */
+struct drm_hdmi_vrr_cap {
+	/** @fva: flag for Fast VActive (Quick Frame Transport) support */
+	bool fva;
+
+	/** @mcnmvrr: flag for Negative M VRR support */
+	bool cnmvrr;
+
+	/** @mcinema_vrr: flag for Cinema VRR support */
+	bool cinema_vrr;
+
+	/** @mdelta: flag for limited frame-to-frame compensation support */
+	bool mdelta;
+
+	/**
+	 * @vrr_min : minimum supported variable refresh rate in Hz.
+	 * Valid values only inide 1 - 48 range
+	 */
+	u16 vrr_min;
+
+	/**
+	 * @vrr_max : maximum supported variable refresh rate in Hz (optional).
+	 * Valid values are either 0 (max based on video mode) or >= 100
+	 */
+	u16 vrr_max;
+
+	/**
+	 * @supported: flag for vrr support based on checking for VRRmin and
+	 * VRRmax values having correct values.
+	 */
+	bool supported;
+};
+
 /**
  * struct drm_hdmi_dsc_cap - DSC capabilities of HDMI sink
  *
@@ -330,6 +368,15 @@ struct drm_hdmi_info {
 	/** @max_lanes: supported by sink */
 	u8 max_lanes;
 
+	/** @fapa_start_location: flag for the FAPA in blanking support */
+	bool fapa_start_location;
+
+	/** @allm: flag for Auto Low Latency Mode support by sink */
+	bool allm;
+
+	/** @vrr_cap: VRR capabilities of the sink */
+	struct drm_hdmi_vrr_cap vrr_cap;
+
 	/** @dsc_cap: DSC capabilities of the sink */
 	struct drm_hdmi_dsc_cap dsc_cap;
 };
-- 
2.52.0

From 171f49c4384a04a4e3bdf3e708f16aa70a174c19 Mon Sep 17 00:00:00 2001
From: Eric Naim <dnaim@cachyos.org>
Date: Mon, 26 Jan 2026 17:33:32 +0800
Subject: [PATCH 7/9] t2

Signed-off-by: Eric Naim <dnaim@cachyos.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |    3 +
 drivers/gpu/drm/i915/display/intel_ddi.c      |    3 +
 drivers/gpu/drm/i915/display/intel_fbdev.c    |    6 +-
 drivers/gpu/drm/i915/display/intel_quirks.c   |   15 +
 drivers/gpu/drm/i915/display/intel_quirks.h   |    1 +
 drivers/gpu/vga/vga_switcheroo.c              |    7 +-
 drivers/hid/Kconfig                           |   10 +-
 drivers/hid/Makefile                          |    4 +
 drivers/hid/dockchannel-hid/Kconfig           |   14 +
 drivers/hid/dockchannel-hid/Makefile          |    6 +
 drivers/hid/dockchannel-hid/dockchannel-hid.c | 1213 +++++++++++++++++
 drivers/hid/hid-apple.c                       |   45 +-
 drivers/hid/hid-core.c                        |   11 +-
 drivers/hid/hid-ids.h                         |   10 +
 drivers/hid/hid-magicmouse.c                  |  770 ++++++++++-
 drivers/hid/spi-hid/Kconfig                   |   26 +
 drivers/hid/spi-hid/Makefile                  |   10 +
 drivers/hid/spi-hid/spi-hid-apple-core.c      | 1194 ++++++++++++++++
 drivers/hid/spi-hid/spi-hid-apple-of.c        |  153 +++
 drivers/hid/spi-hid/spi-hid-apple.h           |   35 +
 drivers/hwmon/applesmc.c                      | 1138 ++++++++++++----
 drivers/pci/vgaarb.c                          |    1 +
 drivers/platform/x86/apple-gmux.c             |   18 +
 drivers/soc/apple/Kconfig                     |   24 +
 drivers/soc/apple/Makefile                    |    6 +
 drivers/soc/apple/dockchannel.c               |  406 ++++++
 drivers/soc/apple/rtkit-helper.c              |  151 ++
 drivers/staging/Kconfig                       |    2 +
 drivers/staging/Makefile                      |    1 +
 drivers/staging/apple-bce/Kconfig             |   18 +
 drivers/staging/apple-bce/Makefile            |   28 +
 drivers/staging/apple-bce/apple_bce.c         |  445 ++++++
 drivers/staging/apple-bce/apple_bce.h         |   41 +
 drivers/staging/apple-bce/audio/audio.c       |  711 ++++++++++
 drivers/staging/apple-bce/audio/audio.h       |  125 ++
 drivers/staging/apple-bce/audio/description.h |   42 +
 drivers/staging/apple-bce/audio/pcm.c         |  308 +++++
 drivers/staging/apple-bce/audio/pcm.h         |   16 +
 drivers/staging/apple-bce/audio/protocol.c    |  347 +++++
 drivers/staging/apple-bce/audio/protocol.h    |  147 ++
 .../staging/apple-bce/audio/protocol_bce.c    |  226 +++
 .../staging/apple-bce/audio/protocol_bce.h    |   72 +
 drivers/staging/apple-bce/mailbox.c           |  155 +++
 drivers/staging/apple-bce/mailbox.h           |   53 +
 drivers/staging/apple-bce/queue.c             |  415 ++++++
 drivers/staging/apple-bce/queue.h             |  177 +++
 drivers/staging/apple-bce/queue_dma.c         |  220 +++
 drivers/staging/apple-bce/queue_dma.h         |   50 +
 drivers/staging/apple-bce/vhci/command.h      |  204 +++
 drivers/staging/apple-bce/vhci/queue.c        |  268 ++++
 drivers/staging/apple-bce/vhci/queue.h        |   76 ++
 drivers/staging/apple-bce/vhci/transfer.c     |  673 +++++++++
 drivers/staging/apple-bce/vhci/transfer.h     |   75 +
 drivers/staging/apple-bce/vhci/vhci.c         |  763 +++++++++++
 drivers/staging/apple-bce/vhci/vhci.h         |   52 +
 include/linux/hid.h                           |    6 +-
 include/linux/soc/apple/dockchannel.h         |   26 +
 57 files changed, 10671 insertions(+), 351 deletions(-)
 create mode 100644 drivers/hid/dockchannel-hid/Kconfig
 create mode 100644 drivers/hid/dockchannel-hid/Makefile
 create mode 100644 drivers/hid/dockchannel-hid/dockchannel-hid.c
 create mode 100644 drivers/hid/spi-hid/Kconfig
 create mode 100644 drivers/hid/spi-hid/Makefile
 create mode 100644 drivers/hid/spi-hid/spi-hid-apple-core.c
 create mode 100644 drivers/hid/spi-hid/spi-hid-apple-of.c
 create mode 100644 drivers/hid/spi-hid/spi-hid-apple.h
 create mode 100644 drivers/soc/apple/dockchannel.c
 create mode 100644 drivers/soc/apple/rtkit-helper.c
 create mode 100644 drivers/staging/apple-bce/Kconfig
 create mode 100644 drivers/staging/apple-bce/Makefile
 create mode 100644 drivers/staging/apple-bce/apple_bce.c
 create mode 100644 drivers/staging/apple-bce/apple_bce.h
 create mode 100644 drivers/staging/apple-bce/audio/audio.c
 create mode 100644 drivers/staging/apple-bce/audio/audio.h
 create mode 100644 drivers/staging/apple-bce/audio/description.h
 create mode 100644 drivers/staging/apple-bce/audio/pcm.c
 create mode 100644 drivers/staging/apple-bce/audio/pcm.h
 create mode 100644 drivers/staging/apple-bce/audio/protocol.c
 create mode 100644 drivers/staging/apple-bce/audio/protocol.h
 create mode 100644 drivers/staging/apple-bce/audio/protocol_bce.c
 create mode 100644 drivers/staging/apple-bce/audio/protocol_bce.h
 create mode 100644 drivers/staging/apple-bce/mailbox.c
 create mode 100644 drivers/staging/apple-bce/mailbox.h
 create mode 100644 drivers/staging/apple-bce/queue.c
 create mode 100644 drivers/staging/apple-bce/queue.h
 create mode 100644 drivers/staging/apple-bce/queue_dma.c
 create mode 100644 drivers/staging/apple-bce/queue_dma.h
 create mode 100644 drivers/staging/apple-bce/vhci/command.h
 create mode 100644 drivers/staging/apple-bce/vhci/queue.c
 create mode 100644 drivers/staging/apple-bce/vhci/queue.h
 create mode 100644 drivers/staging/apple-bce/vhci/transfer.c
 create mode 100644 drivers/staging/apple-bce/vhci/transfer.h
 create mode 100644 drivers/staging/apple-bce/vhci/vhci.c
 create mode 100644 drivers/staging/apple-bce/vhci/vhci.h
 create mode 100644 include/linux/soc/apple/dockchannel.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 9630ebb37981..6736b86c9bed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2428,6 +2428,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 	int ret, retry = 0, i;
 	bool supports_atomic = false;
 
+	if (vga_switcheroo_client_probe_defer(pdev))
+		return -EPROBE_DEFER;
+
 	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
 	    (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
 		if (drm_firmware_drivers_only() && amdgpu_modeset == -1)
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 002ccd47856d..c5d2e908f4a7 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -4890,6 +4890,9 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port)
 	if (dig_port->ddi_a_4_lanes)
 		return false;
 
+	if (intel_has_quirk(display, QUIRK_DDI_A_FORCE_4_LANES))
+		return true;
+
 	/* Broxton/Geminilake: Bspec says that DDI_A_4_LANES is the only
 	 *                     supported configuration
 	 */
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 44f4fcce526e..695eadc88eab 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -278,10 +278,10 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper,
 	ifbdev->fb = NULL;
 
 	if (fb &&
-	    (sizes->fb_width > fb->base.width ||
-	     sizes->fb_height > fb->base.height)) {
+	    (sizes->fb_width != fb->base.width ||
+	     sizes->fb_height != fb->base.height)) {
 		drm_dbg_kms(display->drm,
-			    "BIOS fb too small (%dx%d), we require (%dx%d),"
+			    "BIOS fb not valid (%dx%d), we require (%dx%d),"
 			    " releasing it\n",
 			    fb->base.width, fb->base.height,
 			    sizes->fb_width, sizes->fb_height);
diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c
index d2e16b79d6be..85f26517771a 100644
--- a/drivers/gpu/drm/i915/display/intel_quirks.c
+++ b/drivers/gpu/drm/i915/display/intel_quirks.c
@@ -66,6 +66,18 @@ static void quirk_increase_ddi_disabled_time(struct intel_display *display)
 	drm_info(display->drm, "Applying Increase DDI Disabled quirk\n");
 }
 
+/*
+ * In some cases, the firmware might not set the lane count to 4 (for example,
+ * when booting in some dual GPU Macs with the dGPU as the default GPU), this
+ * quirk is used to force it as otherwise it might not be possible to compute a
+ * valid link configuration.
+ */
+static void quirk_ddi_a_force_4_lanes(struct intel_display *display)
+{
+	intel_set_quirk(display, QUIRK_DDI_A_FORCE_4_LANES);
+	drm_info(display->drm, "Applying DDI A Forced 4 Lanes quirk\n");
+}
+
 static void quirk_no_pps_backlight_power_hook(struct intel_display *display)
 {
 	intel_set_quirk(display, QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK);
@@ -240,6 +252,9 @@ static struct intel_quirk intel_quirks[] = {
 
 	/* Dell XPS 13 7390 2-in-1 */
 	{ 0x8a12, 0x1028, 0x08b0, quirk_edp_limit_rate_hbr2 },
+
+	/* Apple MacBookPro15,1 */
+	{ 0x3e9b, 0x106b, 0x0176, quirk_ddi_a_force_4_lanes },
 };
 
 static const struct intel_dpcd_quirk intel_dpcd_quirks[] = {
diff --git a/drivers/gpu/drm/i915/display/intel_quirks.h b/drivers/gpu/drm/i915/display/intel_quirks.h
index 06da0e286c67..71b89e52ebe7 100644
--- a/drivers/gpu/drm/i915/display/intel_quirks.h
+++ b/drivers/gpu/drm/i915/display/intel_quirks.h
@@ -21,6 +21,7 @@ enum intel_quirk_id {
 	QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK,
 	QUIRK_FW_SYNC_LEN,
 	QUIRK_EDP_LIMIT_RATE_HBR2,
+	QUIRK_DDI_A_FORCE_4_LANES,
 };
 
 void intel_init_quirks(struct intel_display *display);
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 68e45a26e85f..7580172daade 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -438,12 +438,7 @@ find_active_client(struct list_head *head)
 bool vga_switcheroo_client_probe_defer(struct pci_dev *pdev)
 {
 	if (pci_is_display(pdev)) {
-		/*
-		 * apple-gmux is needed on pre-retina MacBook Pro
-		 * to probe the panel if pdev is the inactive GPU.
-		 */
-		if (apple_gmux_present() && pdev != vga_default_device() &&
-		    !vgasr_priv.handler_flags)
+		if (apple_gmux_present() && !vgasr_priv.handler_flags)
 			return true;
 	}
 
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 920a64b66b25..ecb3059c68fb 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -140,7 +140,7 @@ config HID_APPLE
 	tristate "Apple {i,Power,Mac}Books"
 	depends on LEDS_CLASS
 	depends on NEW_LEDS
-	default !EXPERT
+	default !EXPERT || SPI_HID_APPLE
 	help
 	Support for some Apple devices which less or more break
 	HID specification.
@@ -727,11 +727,13 @@ config LOGIWHEELS_FF
 
 config HID_MAGICMOUSE
 	tristate "Apple Magic Mouse/Trackpad multi-touch support"
+	default SPI_HID_APPLE
 	help
 	Support for the Apple Magic Mouse/Trackpad multi-touch.
 
 	Say Y here if you want support for the multi-touch features of the
-	Apple Wireless "Magic" Mouse and the Apple Wireless "Magic" Trackpad.
+	Apple Wireless "Magic" Mouse, the Apple Wireless "Magic" Trackpad and
+	force touch Trackpads in Macbooks starting from 2015.
 
 config HID_MALTRON
 	tristate "Maltron L90 keyboard"
@@ -1448,4 +1450,8 @@ endif # HID
 
 source "drivers/hid/usbhid/Kconfig"
 
+source "drivers/hid/spi-hid/Kconfig"
+
+source "drivers/hid/dockchannel-hid/Kconfig"
+
 endif # HID_SUPPORT
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index 361a7daedeb8..397b6cf5b6c3 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -173,6 +173,10 @@ obj-$(CONFIG_INTEL_ISH_HID)	+= intel-ish-hid/
 
 obj-$(CONFIG_AMD_SFH_HID)       += amd-sfh-hid/
 
+obj-$(CONFIG_SPI_HID_APPLE_CORE)	+= spi-hid/
+
+obj-$(CONFIG_HID_DOCKCHANNEL)   += dockchannel-hid/
+
 obj-$(CONFIG_SURFACE_HID_CORE)  += surface-hid/
 
 obj-$(CONFIG_INTEL_THC_HID)     += intel-thc-hid/
diff --git a/drivers/hid/dockchannel-hid/Kconfig b/drivers/hid/dockchannel-hid/Kconfig
new file mode 100644
index 000000000000..8a81d551a83d
--- /dev/null
+++ b/drivers/hid/dockchannel-hid/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+menu "DockChannel HID support"
+	depends on APPLE_DOCKCHANNEL
+
+config HID_DOCKCHANNEL
+	tristate "HID over DockChannel transport layer for Apple Silicon SoCs"
+	default ARCH_APPLE
+	depends on APPLE_DOCKCHANNEL && INPUT && OF && HID
+	help
+	  Say Y here if you use an M2 or later Apple Silicon based laptop.
+	  The keyboard and touchpad are HID based devices connected via the
+	  proprietary DockChannel interface.
+
+endmenu
diff --git a/drivers/hid/dockchannel-hid/Makefile b/drivers/hid/dockchannel-hid/Makefile
new file mode 100644
index 000000000000..7dba766b047f
--- /dev/null
+++ b/drivers/hid/dockchannel-hid/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+#
+# Makefile for DockChannel HID transport drivers
+#
+
+obj-$(CONFIG_HID_DOCKCHANNEL)	+= dockchannel-hid.o
diff --git a/drivers/hid/dockchannel-hid/dockchannel-hid.c b/drivers/hid/dockchannel-hid/dockchannel-hid.c
new file mode 100644
index 000000000000..a712a724ded3
--- /dev/null
+++ b/drivers/hid/dockchannel-hid/dockchannel-hid.c
@@ -0,0 +1,1213 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0 OR MIT
+ *
+ * Apple DockChannel HID transport driver
+ *
+ * Copyright The Asahi Linux Contributors
+ */
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware.h>
+#include <linux/gpio/consumer.h>
+#include <linux/hid.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/soc/apple/dockchannel.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
+#include <linux/of.h>
+#include "../hid-ids.h"
+
+#define COMMAND_TIMEOUT_MS 1000
+#define START_TIMEOUT_MS 2000
+
+#define MAX_INTERFACES 16
+
+/* Data + checksum */
+#define MAX_PKT_SIZE (0xffff + 4)
+
+#define DCHID_CHANNEL_CMD 0x11
+#define DCHID_CHANNEL_REPORT 0x12
+
+struct dchid_hdr {
+	u8 hdr_len;
+	u8 channel;
+	u16 length;
+	u8 seq;
+	u8 iface;
+	u16 pad;
+} __packed;
+
+#define IFACE_COMM 0
+
+#define FLAGS_GROUP GENMASK(7, 6)
+#define FLAGS_REQ GENMASK(5, 0)
+
+#define REQ_SET_REPORT 0
+#define REQ_GET_REPORT 1
+
+struct dchid_subhdr {
+	u8 flags;
+	u8 unk;
+	u16 length;
+	u32 retcode;
+} __packed;
+
+#define EVENT_GPIO_CMD	0xa0
+#define EVENT_INIT	0xf0
+#define EVENT_READY	0xf1
+
+struct dchid_init_hdr {
+	u8 type;
+	u8 unk1;
+	u8 unk2;
+	u8 iface;
+	char name[16];
+	u8 more_packets;
+	u8 unkpad;
+} __packed;
+
+#define INIT_HID_DESCRIPTOR	0
+#define INIT_GPIO_REQUEST	1
+#define INIT_TERMINATOR		2
+#define INIT_PRODUCT_NAME	7
+
+#define CMD_RESET_INTERFACE 0x40
+#define CMD_SEND_FIRMWARE 0x95
+#define CMD_ENABLE_INTERFACE 0xb4
+#define CMD_ACK_GPIO_CMD 0xa1
+
+struct dchid_init_block_hdr {
+	u16 type;
+	u16 length;
+} __packed;
+
+#define MAX_GPIO_NAME 32
+
+struct dchid_gpio_request {
+	u16 unk;
+	u16 id;
+	char name[MAX_GPIO_NAME];
+} __packed;
+
+struct dchid_gpio_cmd {
+	u8 type;
+	u8 iface;
+	u8 gpio;
+	u8 unk;
+	u8 cmd;
+} __packed;
+
+struct dchid_gpio_ack {
+	u8 type;
+	u32 retcode;
+	u8 cmd[];
+} __packed;
+
+#define STM_REPORT_ID		0x10
+#define STM_REPORT_SERIAL	0x11
+#define STM_REPORT_KEYBTYPE	0x14
+
+struct dchid_stm_id {
+	u8 unk;
+	u16 vendor_id;
+	u16 product_id;
+	u16 version_number;
+	u8 unk2;
+	u8 unk3;
+	u8 keyboard_type;
+	u8 serial_length;
+	/* Serial follows, but we grab it with a different report. */
+} __packed;
+
+#define FW_MAGIC 0x46444948
+#define FW_VER 1
+
+struct fw_header {
+	u32 magic;
+	u32 version;
+	u32 hdr_length;
+	u32 data_length;
+	u32 iface_offset;
+} __packed;
+
+struct dchid_work {
+	struct work_struct work;
+	struct dchid_iface *iface;
+
+	struct dchid_hdr hdr;
+	u8 data[];
+};
+
+struct dchid_iface {
+	struct dockchannel_hid *dchid;
+	struct hid_device *hid;
+	struct workqueue_struct *wq;
+
+	bool creating;
+	struct work_struct create_work;
+
+	int index;
+	const char *name;
+	const struct device_node *of_node;
+
+	uint8_t tx_seq;
+	bool deferred;
+	bool starting;
+	bool open;
+	struct completion ready;
+
+	void *hid_desc;
+	size_t hid_desc_len;
+
+	struct gpio_desc *gpio;
+	char gpio_name[MAX_GPIO_NAME];
+	int gpio_id;
+
+	struct mutex out_mutex;
+	u32 out_flags;
+	int out_report;
+	u32 retcode;
+	void *resp_buf;
+	size_t resp_size;
+	struct completion out_complete;
+
+	u32 keyboard_layout_id;
+};
+
+struct dockchannel_hid {
+	struct device *dev;
+	struct dockchannel *dc;
+	struct device_link *helper_link;
+
+	bool id_ready;
+	struct dchid_stm_id device_id;
+	char serial[64];
+
+	struct dchid_iface *comm;
+	struct dchid_iface *ifaces[MAX_INTERFACES];
+
+	u8 pkt_buf[MAX_PKT_SIZE];
+
+	/* Workqueue to asynchronously create HID devices */
+	struct workqueue_struct *new_iface_wq;
+};
+
+static ssize_t apple_layout_id_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct hid_device *hdev = to_hid_device(dev);
+	struct dchid_iface *iface = hdev->driver_data;
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", iface->keyboard_layout_id);
+}
+
+static DEVICE_ATTR_RO(apple_layout_id);
+
+static struct dchid_iface *
+dchid_get_interface(struct dockchannel_hid *dchid, int index, const char *name)
+{
+	struct dchid_iface *iface;
+
+	if (index >= MAX_INTERFACES) {
+		dev_err(dchid->dev, "Interface index %d out of range\n", index);
+		return NULL;
+	}
+
+	if (dchid->ifaces[index])
+		return dchid->ifaces[index];
+
+	iface = devm_kzalloc(dchid->dev, sizeof(struct dchid_iface), GFP_KERNEL);
+	if (!iface)
+		return NULL;
+
+	iface->index = index;
+	iface->name = devm_kstrdup(dchid->dev, name, GFP_KERNEL);
+	iface->dchid = dchid;
+	iface->out_report= -1;
+	init_completion(&iface->out_complete);
+	init_completion(&iface->ready);
+	mutex_init(&iface->out_mutex);
+	iface->wq = alloc_ordered_workqueue("dchid-%s", WQ_MEM_RECLAIM, iface->name);
+	if (!iface->wq)
+		return NULL;
+
+	/* Comm is not a HID subdevice */
+	if (!strcmp(name, "comm")) {
+		dchid->ifaces[index] = iface;
+		return iface;
+	}
+
+	iface->of_node = of_get_child_by_name(dchid->dev->of_node, name);
+	if (!iface->of_node) {
+		dev_warn(dchid->dev, "No OF node for subdevice %s, ignoring.", name);
+		return NULL;
+	}
+
+	dchid->ifaces[index] = iface;
+	return iface;
+}
+
+static u32 dchid_checksum(void *p, size_t length)
+{
+	u32 sum = 0;
+
+	while (length >= 4) {
+		sum += get_unaligned_le32(p);
+		p += 4;
+		length -= 4;
+	}
+
+	WARN_ON_ONCE(length);
+	return sum;
+}
+
+static int dchid_send(struct dchid_iface *iface, u32 flags, void *msg, size_t size)
+{
+	u32 checksum = 0xffffffff;
+	size_t wsize = round_down(size, 4);
+	size_t tsize = size - wsize;
+	int ret;
+	struct {
+		struct dchid_hdr hdr;
+		struct dchid_subhdr sub;
+	} __packed h;
+
+	memset(&h, 0, sizeof(h));
+	h.hdr.hdr_len = sizeof(h.hdr);
+	h.hdr.channel = DCHID_CHANNEL_CMD;
+	h.hdr.length = round_up(size, 4) + sizeof(h.sub);
+	h.hdr.seq = iface->tx_seq;
+	h.hdr.iface = iface->index;
+	h.sub.flags = flags;
+	h.sub.length = size;
+
+	ret = dockchannel_send(iface->dchid->dc, &h, sizeof(h));
+	if (ret < 0)
+		return ret;
+	checksum -= dchid_checksum(&h, sizeof(h));
+
+	ret = dockchannel_send(iface->dchid->dc, msg, wsize);
+	if (ret < 0)
+		return ret;
+	checksum -= dchid_checksum(msg, wsize);
+
+	if (tsize) {
+		u8 tail[4] = {0, 0, 0, 0};
+
+		memcpy(tail, msg + wsize, tsize);
+		ret = dockchannel_send(iface->dchid->dc, tail, sizeof(tail));
+		if (ret < 0)
+			return ret;
+		checksum -= dchid_checksum(tail, sizeof(tail));
+	}
+
+	ret = dockchannel_send(iface->dchid->dc, &checksum, sizeof(checksum));
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int dchid_cmd(struct dchid_iface *iface, u32 type, u32 req,
+		     void *data, size_t size, void *resp_buf, size_t resp_size)
+{
+	int ret;
+	int report_id = *(u8*)data;
+
+	mutex_lock(&iface->out_mutex);
+
+	WARN_ON(iface->out_report != -1);
+	iface->out_report = report_id;
+	iface->out_flags = FIELD_PREP(FLAGS_GROUP, type) | FIELD_PREP(FLAGS_REQ, req);
+	iface->resp_buf = resp_buf;
+	iface->resp_size = resp_size;
+	reinit_completion(&iface->out_complete);
+
+	ret = dchid_send(iface, iface->out_flags, data, size);
+	if (ret < 0)
+		goto done;
+
+	if (!wait_for_completion_timeout(&iface->out_complete, msecs_to_jiffies(COMMAND_TIMEOUT_MS))) {
+		dev_err(iface->dchid->dev, "output report 0x%x to iface  %d (%s) timed out\n",
+			report_id, iface->index, iface->name);
+		ret = -ETIMEDOUT;
+		goto done;
+	}
+
+	ret = iface->resp_size;
+	if (iface->retcode) {
+		dev_err(iface->dchid->dev,
+			"output report 0x%x to iface %d (%s) failed with err 0x%x\n",
+			report_id, iface->index, iface->name, iface->retcode);
+		ret = -EIO;
+	}
+
+done:
+	iface->tx_seq++;
+	iface->out_report = -1;
+	iface->out_flags = 0;
+	iface->resp_buf = NULL;
+	iface->resp_size = 0;
+	mutex_unlock(&iface->out_mutex);
+	return ret;
+}
+
+static int dchid_comm_cmd(struct dockchannel_hid *dchid, void *cmd, size_t size)
+{
+	return dchid_cmd(dchid->comm, HID_FEATURE_REPORT, REQ_SET_REPORT, cmd, size, NULL, 0);
+}
+
+static int dchid_enable_interface(struct dchid_iface *iface)
+{
+	u8 msg[] = { CMD_ENABLE_INTERFACE, iface->index };
+
+	return dchid_comm_cmd(iface->dchid, msg, sizeof(msg));
+}
+
+static int dchid_reset_interface(struct dchid_iface *iface, int state)
+{
+	u8 msg[] = { CMD_RESET_INTERFACE, 1, iface->index, state };
+
+	return dchid_comm_cmd(iface->dchid, msg, sizeof(msg));
+}
+
+static int dchid_send_firmware(struct dchid_iface *iface, void *firmware, size_t size)
+{
+	struct {
+		u8 cmd;
+		u8 unk1;
+		u8 unk2;
+		u8 iface;
+		u64 addr;
+		u32 size;
+	} __packed msg = {
+		.cmd = CMD_SEND_FIRMWARE,
+		.unk1 = 2,
+		.unk2 = 0,
+		.iface = iface->index,
+		.size = size,
+	};
+	dma_addr_t addr;
+	void *buf = dmam_alloc_coherent(iface->dchid->dev, size, &addr, GFP_KERNEL);
+
+	if (IS_ERR_OR_NULL(buf))
+		return buf ? PTR_ERR(buf) : -ENOMEM;
+
+	msg.addr = addr;
+	memcpy(buf, firmware, size);
+	wmb();
+
+	return dchid_comm_cmd(iface->dchid, &msg, sizeof(msg));
+}
+
+static int dchid_get_firmware(struct dchid_iface *iface, void **firmware, size_t *size)
+{
+	int ret;
+	const char *fw_name;
+	const struct firmware *fw;
+	struct fw_header *hdr;
+	u8 *fw_data;
+
+	ret = of_property_read_string(iface->of_node, "firmware-name", &fw_name);
+	if (ret) {
+		/* Firmware is only for some devices */
+		*firmware = NULL;
+		*size = 0;
+		return 0;
+	}
+
+	ret = request_firmware(&fw, fw_name, iface->dchid->dev);
+	if (ret)
+		return ret;
+
+	hdr = (struct fw_header *)fw->data;
+
+	if (hdr->magic != FW_MAGIC || hdr->version != FW_VER ||
+		hdr->hdr_length < sizeof(*hdr) || hdr->hdr_length > fw->size ||
+		(hdr->hdr_length + (size_t)hdr->data_length) > fw->size ||
+		hdr->iface_offset >= hdr->data_length) {
+		dev_warn(iface->dchid->dev, "%s: invalid firmware header\n",
+			 fw_name);
+		ret = -EINVAL;
+		goto done;
+	}
+
+	fw_data = devm_kmemdup(iface->dchid->dev, fw->data + hdr->hdr_length,
+			       hdr->data_length, GFP_KERNEL);
+	if (!fw_data) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	if (hdr->iface_offset)
+		fw_data[hdr->iface_offset] = iface->index;
+
+	*firmware = fw_data;
+	*size = hdr->data_length;
+
+done:
+	release_firmware(fw);
+	return ret;
+}
+
+static int dchid_request_gpio(struct dchid_iface *iface)
+{
+	char prop_name[MAX_GPIO_NAME + 16];
+
+	if (iface->gpio)
+		return 0;
+
+	dev_info(iface->dchid->dev, "Requesting GPIO %s#%d: %s\n",
+		 iface->name, iface->gpio_id, iface->gpio_name);
+
+	snprintf(prop_name, sizeof(prop_name), "apple,%s", iface->gpio_name);
+
+	iface->gpio = devm_gpiod_get_index(iface->dchid->dev, prop_name, 0, GPIOD_OUT_LOW);
+
+	if (IS_ERR_OR_NULL(iface->gpio)) {
+		dev_err(iface->dchid->dev, "Failed to request GPIO %s-gpios\n", prop_name);
+		iface->gpio = NULL;
+		return -1;
+	}
+
+	return 0;
+}
+
+static int dchid_start_interface(struct dchid_iface *iface)
+{
+	void *fw;
+	size_t size;
+	int ret;
+
+	if (iface->starting) {
+		dev_warn(iface->dchid->dev, "Interface %s is already starting", iface->name);
+		return -EINPROGRESS;
+	}
+
+	dev_info(iface->dchid->dev, "Starting interface %s\n", iface->name);
+
+	iface->starting = true;
+
+	/* Look to see if we need firmware */
+	ret = dchid_get_firmware(iface, &fw, &size);
+	if (ret < 0)
+		goto err;
+
+	/* If we need a GPIO, make sure we have it. */
+	if (iface->gpio_id) {
+		ret = dchid_request_gpio(iface);
+		if (ret < 0)
+			goto err;
+	}
+
+	/* Only multi-touch has firmware */
+	if (fw && size) {
+
+		/* Send firmware to the device */
+		dev_info(iface->dchid->dev, "Sending firmware for %s\n", iface->name);
+		ret = dchid_send_firmware(iface, fw, size);
+		if (ret < 0) {
+			dev_err(iface->dchid->dev, "Failed to send %s firmwareS", iface->name);
+			goto err;
+		}
+
+		/* After loading firmware, multi-touch needs a reset */
+		dev_info(iface->dchid->dev, "Resetting %s\n", iface->name);
+		dchid_reset_interface(iface, 0);
+		dchid_reset_interface(iface, 2);
+	}
+
+	return 0;
+
+err:
+	iface->starting = false;
+	return ret;
+}
+
+static int dchid_start(struct hid_device *hdev)
+{
+	struct dchid_iface *iface = hdev->driver_data;
+
+	if (iface->keyboard_layout_id) {
+		int ret = device_create_file(&hdev->dev, &dev_attr_apple_layout_id);
+		if (ret) {
+			dev_warn(iface->dchid->dev, "Failed to create apple_layout_id: %d", ret);
+			iface->keyboard_layout_id = 0;
+		}
+	}
+
+	return 0;
+};
+
+static void dchid_stop(struct hid_device *hdev)
+{
+	struct dchid_iface *iface = hdev->driver_data;
+
+	if (iface->keyboard_layout_id)
+		device_remove_file(&hdev->dev, &dev_attr_apple_layout_id);
+}
+
+static int dchid_open(struct hid_device *hdev)
+{
+	struct dchid_iface *iface = hdev->driver_data;
+	int ret;
+
+	if (!completion_done(&iface->ready)) {
+		ret = dchid_start_interface(iface);
+		if (ret < 0)
+			return ret;
+
+		if (!wait_for_completion_timeout(&iface->ready, msecs_to_jiffies(START_TIMEOUT_MS))) {
+			dev_err(iface->dchid->dev, "iface %s start timed out\n", iface->name);
+			return -ETIMEDOUT;
+		}
+	}
+
+	iface->open = true;
+	return 0;
+}
+
+static void dchid_close(struct hid_device *hdev)
+{
+	struct dchid_iface *iface = hdev->driver_data;
+
+	iface->open = false;
+}
+
+static int dchid_parse(struct hid_device *hdev)
+{
+	struct dchid_iface *iface = hdev->driver_data;
+
+	return hid_parse_report(hdev, iface->hid_desc, iface->hid_desc_len);
+}
+
+/* Note: buf excludes report number! For ease of fetching strings/etc. */
+static int dchid_get_report_cmd(struct dchid_iface *iface, u8 reportnum, void *buf, size_t len)
+{
+	int ret = dchid_cmd(iface, HID_FEATURE_REPORT, REQ_GET_REPORT, &reportnum, 1, buf, len);
+
+	return ret <= 0 ? ret : ret - 1;
+}
+
+/* Note: buf includes report number! */
+static int dchid_set_report(struct dchid_iface *iface, void *buf, size_t len)
+{
+	return dchid_cmd(iface, HID_OUTPUT_REPORT, REQ_SET_REPORT, buf, len, NULL, 0);
+}
+
+static int dchid_raw_request(struct hid_device *hdev,
+				unsigned char reportnum, __u8 *buf, size_t len,
+				unsigned char rtype, int reqtype)
+{
+	struct dchid_iface *iface = hdev->driver_data;
+
+	switch (reqtype) {
+	case HID_REQ_GET_REPORT:
+		buf[0] = reportnum;
+		return dchid_cmd(iface, rtype, REQ_GET_REPORT, &reportnum, 1, buf + 1, len - 1);
+	case HID_REQ_SET_REPORT:
+		return dchid_set_report(iface, buf, len);
+	default:
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static struct hid_ll_driver dchid_ll = {
+	.start = &dchid_start,
+	.stop = &dchid_stop,
+	.open = &dchid_open,
+	.close = &dchid_close,
+	.parse = &dchid_parse,
+	.raw_request = &dchid_raw_request,
+};
+
+static void dchid_create_interface_work(struct work_struct *ws)
+{
+	struct dchid_iface *iface = container_of(ws, struct dchid_iface, create_work);
+	struct dockchannel_hid *dchid = iface->dchid;
+	struct hid_device *hid;
+	int ret;
+
+	if (iface->hid) {
+		dev_warn(dchid->dev, "Interface %s already created!\n",
+			 iface->name);
+		return;
+	}
+
+	dev_info(dchid->dev, "New interface %s\n", iface->name);
+
+	/* Start the interface. This is not the entire init process, as firmware is loaded later on device open. */
+	ret = dchid_enable_interface(iface);
+	if (ret < 0) {
+		dev_warn(dchid->dev, "Failed to enable %s: %d\n", iface->name, ret);
+		return;
+	}
+
+	iface->deferred = false;
+
+	hid = hid_allocate_device();
+	if (IS_ERR(hid))
+		return;
+
+	snprintf(hid->name, sizeof(hid->name), "Apple MTP %s", iface->name);
+	snprintf(hid->phys, sizeof(hid->phys), "%s.%d (%s)",
+		 dev_name(dchid->dev), iface->index, iface->name);
+	strscpy(hid->uniq, dchid->serial, sizeof(hid->uniq));
+
+	hid->ll_driver = &dchid_ll;
+	hid->bus = BUS_HOST;
+	hid->vendor = dchid->device_id.vendor_id;
+	hid->product = dchid->device_id.product_id;
+	hid->version = dchid->device_id.version_number;
+	hid->type = HID_TYPE_OTHER;
+	if (!strcmp(iface->name, "multi-touch")) {
+		hid->type = HID_TYPE_SPI_MOUSE;
+	} else if (!strcmp(iface->name, "keyboard")) {
+		u32 country_code = 0;
+
+		hid->type = HID_TYPE_SPI_KEYBOARD;
+
+		/*
+		 * We have to get the country code from the device tree, since the
+		 * device provides no reliable way to get this info.
+		 */
+		if (!of_property_read_u32(iface->of_node, "hid-country-code", &country_code))
+			hid->country = country_code;
+
+		of_property_read_u32(iface->of_node, "apple,keyboard-layout-id",
+			&iface->keyboard_layout_id);
+	}
+
+	hid->dev.parent = iface->dchid->dev;
+	hid->driver_data = iface;
+
+	iface->hid = hid;
+
+	ret = hid_add_device(hid);
+	if (ret < 0) {
+		iface->hid = NULL;
+		hid_destroy_device(hid);
+		dev_warn(iface->dchid->dev, "Failed to register hid device %s", iface->name);
+	}
+}
+
+static int dchid_create_interface(struct dchid_iface *iface)
+{
+	if (iface->creating)
+		return -EBUSY;
+
+	iface->creating = true;
+	INIT_WORK(&iface->create_work, dchid_create_interface_work);
+	return queue_work(iface->dchid->new_iface_wq, &iface->create_work);
+}
+
+static void dchid_handle_descriptor(struct dchid_iface *iface, void *hid_desc, size_t desc_len)
+{
+	if (iface->hid) {
+		dev_warn(iface->dchid->dev, "Tried to initialize already started interface %s!\n",
+			 iface->name);
+		return;
+	}
+
+	iface->hid_desc = devm_kmemdup(iface->dchid->dev, hid_desc, desc_len, GFP_KERNEL);
+	if (!iface->hid_desc)
+		return;
+
+	iface->hid_desc_len = desc_len;
+}
+
+static void dchid_handle_ready(struct dockchannel_hid *dchid, void *data, size_t length)
+{
+	struct dchid_iface *iface;
+	u8 *pkt = data;
+	u8 index;
+	int i, ret;
+
+	if (length < 2) {
+		dev_err(dchid->dev, "Bad length for ready message: %zu\n", length);
+		return;
+	}
+
+	index = pkt[1];
+
+	if (index >= MAX_INTERFACES) {
+		dev_err(dchid->dev, "Got ready notification for bad iface %d\n", index);
+		return;
+	}
+
+	iface = dchid->ifaces[index];
+	if (!iface) {
+		dev_err(dchid->dev, "Got ready notification for unknown iface %d\n", index);
+		return;
+	}
+
+	dev_info(dchid->dev, "Interface %s is now ready\n", iface->name);
+	complete_all(&iface->ready);
+
+	/* When STM is ready, grab global device info */
+	if (!strcmp(iface->name, "stm")) {
+		ret = dchid_get_report_cmd(iface, STM_REPORT_ID, &dchid->device_id,
+					   sizeof(dchid->device_id));
+		if (ret < sizeof(dchid->device_id)) {
+			dev_warn(iface->dchid->dev, "Failed to get device ID from STM!\n");
+			/* Fake it and keep going. Things might still work... */
+			memset(&dchid->device_id, 0, sizeof(dchid->device_id));
+			dchid->device_id.vendor_id = HOST_VENDOR_ID_APPLE;
+		}
+		ret = dchid_get_report_cmd(iface, STM_REPORT_SERIAL, dchid->serial,
+					   sizeof(dchid->serial) - 1);
+		if (ret < 0) {
+			dev_warn(iface->dchid->dev, "Failed to get serial from STM!\n");
+			dchid->serial[0] = 0;
+		}
+
+		dchid->id_ready = true;
+		for (i = 0; i < MAX_INTERFACES; i++) {
+			if (!dchid->ifaces[i] || !dchid->ifaces[i]->deferred)
+				continue;
+			dchid_create_interface(dchid->ifaces[i]);
+		}
+	}
+}
+
+static void dchid_handle_init(struct dockchannel_hid *dchid, void *data, size_t length)
+{
+	struct dchid_init_hdr *hdr = data;
+	struct dchid_iface *iface;
+	struct dchid_init_block_hdr *blk;
+
+	if (length < sizeof(*hdr))
+		return;
+
+	iface = dchid_get_interface(dchid, hdr->iface, hdr->name);
+	if (!iface)
+		return;
+
+	data += sizeof(*hdr);
+	length -= sizeof(*hdr);
+
+	while (length >= sizeof(*blk)) {
+		blk = data;
+		data += sizeof(*blk);
+		length -= sizeof(*blk);
+
+		if (blk->length > length)
+			break;
+
+		switch (blk->type) {
+		case INIT_HID_DESCRIPTOR:
+			dchid_handle_descriptor(iface, data, blk->length);
+			break;
+
+		case INIT_GPIO_REQUEST: {
+			struct dchid_gpio_request *req = data;
+
+			if (sizeof(*req) > length)
+				break;
+
+			if (iface->gpio_id) {
+				dev_err(dchid->dev,
+					"Cannot request more than one GPIO per interface!\n");
+				break;
+			}
+
+			strscpy(iface->gpio_name, req->name, MAX_GPIO_NAME);
+			iface->gpio_id = req->id;
+			break;
+		}
+
+		case INIT_TERMINATOR:
+			break;
+
+		case INIT_PRODUCT_NAME: {
+			char *product = data;
+
+			if (product[blk->length - 1] != 0) {
+				dev_warn(dchid->dev, "Unterminated product name for %s\n",
+					 iface->name);
+			} else {
+				dev_info(dchid->dev, "Product name for %s: %s\n",
+					 iface->name, product);
+			}
+			break;
+		}
+
+		default:
+			dev_warn(dchid->dev, "Unknown init packet %d for %s\n",
+				 blk->type, iface->name);
+			break;
+		}
+
+		data += blk->length;
+		length -= blk->length;
+
+		if (blk->type == INIT_TERMINATOR)
+			break;
+	}
+
+	if (hdr->more_packets)
+		return;
+
+	/* We need to enable STM first, since it'll give us the device IDs */
+	if (iface->dchid->id_ready || !strcmp(iface->name, "stm")) {
+		dchid_create_interface(iface);
+	} else {
+		iface->deferred = true;
+	}
+}
+
+static void dchid_handle_gpio(struct dockchannel_hid *dchid, void *data, size_t length)
+{
+	struct dchid_gpio_cmd *cmd = data;
+	struct dchid_iface *iface;
+	u32 retcode = 0xe000f00d; /* Give it a random Apple-style error code */
+	struct dchid_gpio_ack *ack;
+
+	if (length < sizeof(*cmd))
+		return;
+
+	if (cmd->iface >= MAX_INTERFACES || !(iface = dchid->ifaces[cmd->iface])) {
+		dev_err(dchid->dev, "Got GPIO command for bad inteface %d\n", cmd->iface);
+		goto err;
+	}
+
+	if (dchid_request_gpio(iface) < 0)
+		goto err;
+
+	if (!iface->gpio || cmd->gpio != iface->gpio_id) {
+		dev_err(dchid->dev, "Got GPIO command for bad GPIO %s#%d\n",
+			iface->name, cmd->gpio);
+		goto err;
+	}
+
+	dev_info(dchid->dev, "GPIO command: %s#%d: %d\n", iface->name, cmd->gpio, cmd->cmd);
+
+	switch (cmd->cmd) {
+	case 3:
+		/* Pulse.  */
+		gpiod_set_value_cansleep(iface->gpio, 1);
+		msleep(10); /* Random guess... */
+		gpiod_set_value_cansleep(iface->gpio, 0);
+		retcode = 0;
+		break;
+	default:
+		dev_err(dchid->dev, "Unknown GPIO command %d\n", cmd->cmd	);
+		break;
+	}
+
+err:
+	/* Ack it */
+	ack = kzalloc(sizeof(*ack) + length, GFP_KERNEL);
+	if (!ack)
+		return;
+
+	ack->type = CMD_ACK_GPIO_CMD;
+	ack->retcode = retcode;
+	memcpy(ack->cmd, data, length);
+
+	if (dchid_comm_cmd(dchid, ack, sizeof(*ack) + length) < 0)
+		dev_err(dchid->dev, "Failed to ACK GPIO command\n");
+
+	kfree(ack);
+}
+
+static void dchid_handle_event(struct dockchannel_hid *dchid, void *data, size_t length)
+{
+	u8 *p = data;
+	switch (*p) {
+	case EVENT_INIT:
+		dchid_handle_init(dchid, data, length);
+		break;
+	case EVENT_READY:
+		dchid_handle_ready(dchid, data, length);
+		break;
+	case EVENT_GPIO_CMD:
+		dchid_handle_gpio(dchid, data, length);
+		break;
+	}
+}
+
+static void dchid_handle_report(struct dchid_iface *iface, void *data, size_t length)
+{
+	struct dockchannel_hid *dchid = iface->dchid;
+
+	if (!iface->hid) {
+		dev_warn(dchid->dev, "Report received but %s is not initialized!\n", iface->name);
+		return;
+	}
+
+	if (!iface->open)
+		return;
+
+	hid_input_report(iface->hid, HID_INPUT_REPORT, data, length, 1);
+}
+
+static void dchid_packet_work(struct work_struct *ws)
+{
+	struct dchid_work *work = container_of(ws, struct dchid_work, work);
+	struct dchid_subhdr *shdr = (void *)work->data;
+	struct dockchannel_hid *dchid = work->iface->dchid;
+	int type = FIELD_GET(FLAGS_GROUP, shdr->flags);
+	u8 *payload = work->data + sizeof(*shdr);
+
+	if (shdr->length + sizeof(*shdr) > work->hdr.length) {
+		dev_err(dchid->dev, "Bad sub header length (%d > %zu)\n",
+			shdr->length, work->hdr.length - sizeof(*shdr));
+		return;
+	}
+
+	switch (type) {
+	case HID_INPUT_REPORT:
+		if (work->hdr.iface == IFACE_COMM)
+			dchid_handle_event(dchid, payload, shdr->length);
+		else
+			dchid_handle_report(work->iface, payload, shdr->length);
+		break;
+	default:
+		dev_err(dchid->dev, "Received unknown packet type %d\n", type);
+		break;
+	}
+
+	kfree(work);
+}
+
+static void dchid_handle_ack(struct dchid_iface *iface, struct dchid_hdr *hdr, void *data)
+{
+	struct dchid_subhdr *shdr = (void *)data;
+	u8 *payload = data + sizeof(*shdr);
+
+	if (shdr->length + sizeof(*shdr) > hdr->length) {
+		dev_err(iface->dchid->dev, "Bad sub header length (%d > %ld)\n",
+			shdr->length, hdr->length - sizeof(*shdr));
+		return;
+	}
+	if (shdr->flags != iface->out_flags) {
+		dev_err(iface->dchid->dev,
+			"Received unexpected flags 0x%x on ACK channel (expFected 0x%x)\n",
+			shdr->flags, iface->out_flags);
+		return;
+	}
+
+	if (shdr->length < 1) {
+		dev_err(iface->dchid->dev, "Received length 0 output report ack\n");
+		return;
+	}
+	if (iface->tx_seq != hdr->seq) {
+		dev_err(iface->dchid->dev, "Received ACK with bad seq (expected %d, got %d)\n",
+			iface->tx_seq, hdr->seq);
+		return;
+	}
+	if (iface->out_report != payload[0]) {
+		dev_err(iface->dchid->dev, "Received ACK with bad report (expected %d, got %d\n",
+			iface->out_report, payload[0]);
+		return;
+	}
+
+	if (iface->resp_buf && iface->resp_size)
+		memcpy(iface->resp_buf, payload + 1, min((size_t)shdr->length - 1, iface->resp_size));
+
+	iface->resp_size = shdr->length;
+	iface->out_report = -1;
+	iface->retcode = shdr->retcode;
+	complete(&iface->out_complete);
+}
+
+static void dchid_handle_packet(void *cookie, size_t avail)
+{
+	struct dockchannel_hid *dchid = cookie;
+	struct dchid_hdr hdr;
+	struct dchid_work *work;
+	struct dchid_iface *iface;
+	u32 checksum;
+
+	if (dockchannel_recv(dchid->dc, &hdr, sizeof(hdr)) != sizeof(hdr)) {
+		dev_err(dchid->dev, "Read failed (header)\n");
+		return;
+	}
+
+	if (hdr.hdr_len != sizeof(hdr)) {
+		dev_err(dchid->dev, "Bad header length %d\n", hdr.hdr_len);
+		goto done;
+	}
+
+	if (dockchannel_recv(dchid->dc, dchid->pkt_buf, hdr.length + 4) != (hdr.length + 4)) {
+		dev_err(dchid->dev, "Read failed (body)\n");
+		goto done;
+	}
+
+	checksum = dchid_checksum(&hdr, sizeof(hdr));
+	checksum += dchid_checksum(dchid->pkt_buf, hdr.length + 4);
+
+	if (checksum != 0xffffffff) {
+		dev_err(dchid->dev, "Checksum mismatch (iface %d): 0x%08x != 0xffffffff\n",
+			hdr.iface, checksum);
+		goto done;
+	}
+
+
+	if (hdr.iface >= MAX_INTERFACES) {
+		dev_err(dchid->dev, "Bad iface %d\n", hdr.iface);
+	}
+
+	iface = dchid->ifaces[hdr.iface];
+
+	if (!iface) {
+		dev_err(dchid->dev, "Received packet for uninitialized iface %d\n", hdr.iface);
+		goto done;
+	}
+
+	switch (hdr.channel) {
+		case DCHID_CHANNEL_CMD:
+			dchid_handle_ack(iface, &hdr, dchid->pkt_buf);
+			goto done;
+		case DCHID_CHANNEL_REPORT:
+			break;
+		default:
+			dev_warn(dchid->dev, "Unknown channel 0x%x, treating as report...\n",
+				 hdr.channel);
+			break;
+	}
+
+	work = kzalloc(sizeof(*work) + hdr.length, GFP_KERNEL);
+	if (!work)
+		return;
+
+	work->hdr = hdr;
+	work->iface = iface;
+	memcpy(work->data, dchid->pkt_buf, hdr.length);
+	INIT_WORK(&work->work, dchid_packet_work);
+
+	queue_work(iface->wq, &work->work);
+
+done:
+	dockchannel_await(dchid->dc, dchid_handle_packet, dchid, sizeof(struct dchid_hdr));
+}
+
+static int dockchannel_hid_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct dockchannel_hid *dchid;
+	struct device_node *child, *helper;
+	struct platform_device *helper_pdev;
+	struct property *prop;
+	int ret;
+
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (ret)
+		return ret;
+
+	dchid = devm_kzalloc(dev, sizeof(*dchid), GFP_KERNEL);
+	if (!dchid) {
+		return -ENOMEM;
+	}
+
+	dchid->dev = dev;
+
+	/*
+	 * First make sure all the GPIOs are available, in cased we need to defer.
+	 * This is necessary because MTP will request them by name later, and by then
+	 * it's too late to defer the probe.
+	 */
+
+	for_each_child_of_node(dev->of_node, child) {
+		for_each_property_of_node(child, prop) {
+			size_t len = strlen(prop->name);
+			struct gpio_desc *gpio;
+
+			if (len < 12 || strncmp("apple,", prop->name, 6) ||
+			    strcmp("-gpios", prop->name + len - 6))
+				continue;
+
+			gpio = fwnode_gpiod_get_index(&child->fwnode, prop->name, 0, GPIOD_ASIS,
+						      prop->name);
+			if (IS_ERR_OR_NULL(gpio)) {
+				if (PTR_ERR(gpio) == -EPROBE_DEFER) {
+					of_node_put(child);
+					return -EPROBE_DEFER;
+				}
+			} else {
+				gpiod_put(gpio);
+			}
+		}
+	}
+
+	/*
+	 * Make sure we also have the MTP coprocessor available, and
+	 * defer probe if the helper hasn't probed yet.
+	 */
+	helper = of_parse_phandle(dev->of_node, "apple,helper-cpu", 0);
+	if (!helper) {
+		dev_err(dev, "Missing apple,helper-cpu property");
+		return -EINVAL;
+	}
+
+	helper_pdev = of_find_device_by_node(helper);
+	of_node_put(helper);
+	if (!helper_pdev) {
+		dev_err(dev, "Failed to find helper device");
+		return -EINVAL;
+	}
+
+	dchid->helper_link = device_link_add(dev, &helper_pdev->dev,
+					     DL_FLAG_AUTOREMOVE_CONSUMER);
+	put_device(&helper_pdev->dev);
+	if (!dchid->helper_link) {
+		dev_err(dev, "Failed to link to helper device");
+		return -EINVAL;
+	}
+
+	if (dchid->helper_link->supplier->links.status != DL_DEV_DRIVER_BOUND)
+		return -EPROBE_DEFER;
+
+	/* Now it is safe to begin initializing */
+	dchid->dc = dockchannel_init(pdev);
+	if (IS_ERR_OR_NULL(dchid->dc)) {
+		return PTR_ERR(dchid->dc);
+	}
+	dchid->new_iface_wq = alloc_workqueue("dchid-new", WQ_MEM_RECLAIM, 0);
+	if (!dchid->new_iface_wq)
+		return -ENOMEM;
+
+	dchid->comm = dchid_get_interface(dchid, IFACE_COMM, "comm");
+	if (!dchid->comm) {
+		dev_err(dchid->dev, "Failed to initialize comm interface");
+		return -EIO;
+	}
+
+	dev_info(dchid->dev, "Initialized, awaiting packets\n");
+	dockchannel_await(dchid->dc, dchid_handle_packet, dchid, sizeof(struct dchid_hdr));
+
+	return 0;
+}
+
+static void dockchannel_hid_remove(struct platform_device *pdev)
+{
+	BUG_ON(1);
+}
+
+static const struct of_device_id dockchannel_hid_of_match[] = {
+	{ .compatible = "apple,dockchannel-hid" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, dockchannel_hid_of_match);
+MODULE_FIRMWARE("apple/tpmtfw-*.bin");
+
+static struct platform_driver dockchannel_hid_driver = {
+	.driver = {
+		.name = "dockchannel-hid",
+		.of_match_table = dockchannel_hid_of_match,
+	},
+	.probe = dockchannel_hid_probe,
+	.remove = dockchannel_hid_remove,
+};
+module_platform_driver(dockchannel_hid_driver);
+
+MODULE_DESCRIPTION("Apple DockChannel HID transport driver");
+MODULE_AUTHOR("Hector Martin <marcan@marcan.st>");
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
index 57da4f86a9fa..a95ef2ddeafe 100644
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -30,7 +30,7 @@
 #include "hid-ids.h"
 
 #define APPLE_RDESC_JIS		BIT(0)
-/* BIT(1) reserved, was: APPLE_IGNORE_MOUSE */
+#define APPLE_IGNORE_MOUSE	BIT(1)
 #define APPLE_HAS_FN		BIT(2)
 /* BIT(3) reserved, was: APPLE_HIDDEV */
 #define APPLE_ISO_TILDE_QUIRK	BIT(4)
@@ -518,6 +518,16 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
 				table = macbookair_fn_keys;
 			else if (hid->product < 0x21d || hid->product >= 0x300)
 				table = powerbook_fn_keys;
+			else if (hid->bus == BUS_HOST || hid->bus == BUS_SPI)
+				switch (hid->product) {
+				case SPI_DEVICE_ID_APPLE_MACBOOK_PRO13_2020:
+				case HOST_DEVICE_ID_APPLE_MACBOOK_PRO13_2022:
+					table = macbookpro_dedicated_esc_fn_keys;
+					break;
+				default:
+					table = magic_keyboard_2021_and_2024_fn_keys;
+					break;
+				}
 			else
 				table = apple_fn_keys;
 		}
@@ -938,6 +948,13 @@ static int apple_probe(struct hid_device *hdev,
 	struct apple_sc *asc;
 	int ret;
 
+	if ((id->bus == BUS_SPI || id->bus == BUS_HOST) && id->vendor == SPI_VENDOR_ID_APPLE &&
+	    hdev->type != HID_TYPE_SPI_KEYBOARD)
+		return -ENODEV;
+
+	if (quirks & APPLE_IGNORE_MOUSE && hdev->type == HID_TYPE_USBMOUSE)
+		return -ENODEV;
+
 	asc = devm_kzalloc(&hdev->dev, sizeof(*asc), GFP_KERNEL);
 	if (asc == NULL) {
 		hid_err(hdev, "can't alloc apple descriptor\n");
@@ -1160,27 +1177,31 @@ static const struct hid_device_id apple_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_JIS),
 		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K),
-		.driver_data = APPLE_HAS_FN | APPLE_BACKLIGHT_CTL | APPLE_ISO_TILDE_QUIRK },
+		.driver_data = APPLE_HAS_FN | APPLE_BACKLIGHT_CTL | APPLE_ISO_TILDE_QUIRK |
+				APPLE_IGNORE_MOUSE },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132),
 		.driver_data = APPLE_HAS_FN | APPLE_BACKLIGHT_CTL | APPLE_ISO_TILDE_QUIRK |
-			APPLE_DISABLE_FKEYS },
+			APPLE_DISABLE_FKEYS | APPLE_IGNORE_MOUSE },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680),
 		.driver_data = APPLE_HAS_FN | APPLE_BACKLIGHT_CTL | APPLE_ISO_TILDE_QUIRK |
-			APPLE_DISABLE_FKEYS },
+			APPLE_DISABLE_FKEYS | APPLE_IGNORE_MOUSE },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680_ALT),
 		.driver_data = APPLE_HAS_FN | APPLE_BACKLIGHT_CTL | APPLE_ISO_TILDE_QUIRK |
-			APPLE_DISABLE_FKEYS },
+			APPLE_DISABLE_FKEYS | APPLE_IGNORE_MOUSE },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213),
 		.driver_data = APPLE_HAS_FN | APPLE_BACKLIGHT_CTL | APPLE_ISO_TILDE_QUIRK |
-			APPLE_DISABLE_FKEYS },
+			APPLE_DISABLE_FKEYS | APPLE_IGNORE_MOUSE },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_DISABLE_FKEYS },
+		.driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_DISABLE_FKEYS |
+				APPLE_IGNORE_MOUSE },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_DISABLE_FKEYS },
+		.driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_DISABLE_FKEYS |
+				APPLE_IGNORE_MOUSE },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
+		.driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_IGNORE_MOUSE },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_DISABLE_FKEYS },
+		.driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_DISABLE_FKEYS |
+				APPLE_IGNORE_MOUSE },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO),
@@ -1216,6 +1237,10 @@ static const struct hid_device_id apple_devices[] = {
 		.driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY },
 	{ HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2024),
 		.driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
+	{ HID_SPI_DEVICE(SPI_VENDOR_ID_APPLE, HID_ANY_ID),
+		.driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
+	{ HID_DEVICE(BUS_HOST, HID_GROUP_ANY, HOST_VENDOR_ID_APPLE, HID_ANY_ID),
+		.driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT),
 		.driver_data = APPLE_MAGIC_BACKLIGHT },
 
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index a5b3a8ca2fcb..b353fd819989 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -468,7 +468,10 @@ static int hid_parser_global(struct hid_parser *parser, struct hid_item *item)
 
 	case HID_GLOBAL_ITEM_TAG_REPORT_SIZE:
 		parser->global.report_size = item_udata(item);
-		if (parser->global.report_size > 256) {
+		/* Arbitrary maximum. Some Apple devices have 16384 here.
+		 * This * HID_MAX_USAGES must fit in a signed integer.
+		 */
+		if (parser->global.report_size > 16384) {
 			hid_err(parser->device, "invalid report_size %d\n",
 					parser->global.report_size);
 			return -1;
@@ -2316,6 +2319,12 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
 	case BUS_I2C:
 		bus = "I2C";
 		break;
+	case BUS_SPI:
+		bus = "SPI";
+		break;
+	case BUS_HOST:
+		bus = "HOST";
+		break;
 	case BUS_SDW:
 		bus = "SOUNDWIRE";
 		break;
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 9c2bf584d9f6..adad71cc8bf0 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -93,6 +93,8 @@
 
 #define USB_VENDOR_ID_APPLE		0x05ac
 #define BT_VENDOR_ID_APPLE		0x004c
+#define SPI_VENDOR_ID_APPLE		0x05ac
+#define HOST_VENDOR_ID_APPLE		0x05ac
 #define USB_DEVICE_ID_APPLE_MIGHTYMOUSE	0x0304
 #define USB_DEVICE_ID_APPLE_MAGICMOUSE	0x030d
 #define USB_DEVICE_ID_APPLE_MAGICMOUSE2	0x0269
@@ -197,6 +199,14 @@
 #define USB_DEVICE_ID_APPLE_IRCONTROL5	0x8243
 #define USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT 0x8102
 #define USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY 0x8302
+#define SPI_DEVICE_ID_APPLE_MACBOOK_AIR_2020	0x0281
+#define SPI_DEVICE_ID_APPLE_MACBOOK_PRO13_2020	0x0341
+#define SPI_DEVICE_ID_APPLE_MACBOOK_PRO14_2021	0x0342
+#define SPI_DEVICE_ID_APPLE_MACBOOK_PRO16_2021	0x0343
+#define HOST_DEVICE_ID_APPLE_MACBOOK_AIR13_2022	0x0351
+#define HOST_DEVICE_ID_APPLE_MACBOOK_PRO14_2023	0x0352
+#define HOST_DEVICE_ID_APPLE_MACBOOK_PRO16_2023	0x0353
+#define HOST_DEVICE_ID_APPLE_MACBOOK_PRO13_2022	0x0354
 
 #define USB_VENDOR_ID_ASETEK			0x2433
 #define USB_DEVICE_ID_ASETEK_INVICTA		0xf300
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index 7d4a25c6de0e..4bf49d7cad0e 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -60,8 +60,14 @@ MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state fie
 #define MOUSE_REPORT_ID    0x29
 #define MOUSE2_REPORT_ID   0x12
 #define DOUBLE_REPORT_ID   0xf7
+#define SPI_REPORT_ID      0x02
+#define SPI_RESET_REPORT_ID 0x60
+#define MTP_REPORT_ID      0x75
+#define SENSOR_DIMENSIONS_REPORT_ID 0xd9
 #define USB_BATTERY_TIMEOUT_SEC 60
 
+#define MAX_CONTACTS 16
+
 /* These definitions are not precise, but they're close enough.  (Bits
  * 0x03 seem to indicate the aspect ratio of the touch, bits 0x70 seem
  * to be some kind of bit mask -- 0x20 may be a near-field reading,
@@ -112,30 +118,156 @@ MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state fie
 #define TRACKPAD2_RES_Y \
 	((TRACKPAD2_MAX_Y - TRACKPAD2_MIN_Y) / (TRACKPAD2_DIMENSION_Y / 100))
 
+#define J140K_TP_DIMENSION_X (float)12100
+#define J140K_TP_MIN_X -5318
+#define J140K_TP_MAX_X 5787
+#define J140K_TP_RES_X \
+	((J140K_TP_MAX_X - J140K_TP_MIN_X) / (J140K_TP_DIMENSION_X / 100))
+#define J140K_TP_DIMENSION_Y (float)8200
+#define J140K_TP_MIN_Y -157
+#define J140K_TP_MAX_Y 7102
+#define J140K_TP_RES_Y \
+	((J140K_TP_MAX_Y - J140K_TP_MIN_Y) / (J140K_TP_DIMENSION_Y / 100))
+
+#define J132_TP_DIMENSION_X (float)13500
+#define J132_TP_MIN_X -6243
+#define J132_TP_MAX_X 6749
+#define J132_TP_RES_X \
+	((J132_TP_MAX_X - J132_TP_MIN_X) / (J132_TP_DIMENSION_X / 100))
+#define J132_TP_DIMENSION_Y (float)8400
+#define J132_TP_MIN_Y -170
+#define J132_TP_MAX_Y 7685
+#define J132_TP_RES_Y \
+	((J132_TP_MAX_Y - J132_TP_MIN_Y) / (J132_TP_DIMENSION_Y / 100))
+
+#define J680_TP_DIMENSION_X (float)16000
+#define J680_TP_MIN_X -7456
+#define J680_TP_MAX_X 7976
+#define J680_TP_RES_X \
+	((J680_TP_MAX_X - J680_TP_MIN_X) / (J680_TP_DIMENSION_X / 100))
+#define J680_TP_DIMENSION_Y (float)10000
+#define J680_TP_MIN_Y -163
+#define J680_TP_MAX_Y 9283
+#define J680_TP_RES_Y \
+	((J680_TP_MAX_Y - J680_TP_MIN_Y) / (J680_TP_DIMENSION_Y / 100))
+
+#define J680_ALT_TP_DIMENSION_X (float)16000
+#define J680_ALT_TP_MIN_X -7456
+#define J680_ALT_TP_MAX_X 7976
+#define J680_ALT_TP_RES_X \
+	((J680_ALT_TP_MAX_X - J680_ALT_TP_MIN_X) / (J680_ALT_TP_DIMENSION_X / 100))
+#define J680_ALT_TP_DIMENSION_Y (float)10000
+#define J680_ALT_TP_MIN_Y -163
+#define J680_ALT_TP_MAX_Y 9283
+#define J680_ALT_TP_RES_Y \
+	((J680_ALT_TP_MAX_Y - J680_ALT_TP_MIN_Y) / (J680_ALT_TP_DIMENSION_Y / 100))
+
+#define J213_TP_DIMENSION_X (float)13500
+#define J213_TP_MIN_X -6243
+#define J213_TP_MAX_X 6749
+#define J213_TP_RES_X \
+	((J213_TP_MAX_X - J213_TP_MIN_X) / (J213_TP_DIMENSION_X / 100))
+#define J213_TP_DIMENSION_Y (float)8400
+#define J213_TP_MIN_Y -170
+#define J213_TP_MAX_Y 7685
+#define J213_TP_RES_Y \
+	((J213_TP_MAX_Y - J213_TP_MIN_Y) / (J213_TP_DIMENSION_Y / 100))
+
+#define J214K_TP_DIMENSION_X (float)13200
+#define J214K_TP_MIN_X -6046
+#define J214K_TP_MAX_X 6536
+#define J214K_TP_RES_X \
+	((J214K_TP_MAX_X - J214K_TP_MIN_X) / (J214K_TP_DIMENSION_X / 100))
+#define J214K_TP_DIMENSION_Y (float)8200
+#define J214K_TP_MIN_Y -164
+#define J214K_TP_MAX_Y 7439
+#define J214K_TP_RES_Y \
+	((J214K_TP_MAX_Y - J214K_TP_MIN_Y) / (J214K_TP_DIMENSION_Y / 100))
+
+#define J223_TP_DIMENSION_X (float)13200
+#define J223_TP_MIN_X -6046
+#define J223_TP_MAX_X 6536
+#define J223_TP_RES_X \
+	((J223_TP_MAX_X - J223_TP_MIN_X) / (J223_TP_DIMENSION_X / 100))
+#define J223_TP_DIMENSION_Y (float)8200
+#define J223_TP_MIN_Y -164
+#define J223_TP_MAX_Y 7439
+#define J223_TP_RES_Y \
+	((J223_TP_MAX_Y - J223_TP_MIN_Y) / (J223_TP_DIMENSION_Y / 100))
+
+#define J230K_TP_DIMENSION_X (float)12100
+#define J230K_TP_MIN_X -5318
+#define J230K_TP_MAX_X 5787
+#define J230K_TP_RES_X \
+	((J230K_TP_MAX_X - J230K_TP_MIN_X) / (J230K_TP_DIMENSION_X / 100))
+#define J230K_TP_DIMENSION_Y (float)8200
+#define J230K_TP_MIN_Y -157
+#define J230K_TP_MAX_Y 7102
+#define J230K_TP_RES_Y \
+	((J230K_TP_MAX_Y - J230K_TP_MIN_Y) / (J230K_TP_DIMENSION_Y / 100))
+
+#define J152F_TP_DIMENSION_X (float)16000
+#define J152F_TP_MIN_X -7456
+#define J152F_TP_MAX_X 7976
+#define J152F_TP_RES_X \
+	((J152F_TP_MAX_X - J152F_TP_MIN_X) / (J152F_TP_DIMENSION_X / 100))
+#define J152F_TP_DIMENSION_Y (float)10000
+#define J152F_TP_MIN_Y -163
+#define J152F_TP_MAX_Y 9283
+#define J152F_TP_RES_Y \
+	((J152F_TP_MAX_Y - J152F_TP_MIN_Y) / (J152F_TP_DIMENSION_Y / 100))
+
+/* These are fallback values, since the real values will be queried from the device. */
+#define J314_TP_DIMENSION_X (float)13000
+#define J314_TP_MIN_X -5900
+#define J314_TP_MAX_X 6500
+#define J314_TP_RES_X \
+	((J314_TP_MAX_X - J314_TP_MIN_X) / (J314_TP_DIMENSION_X / 100))
+#define J314_TP_DIMENSION_Y (float)8100
+#define J314_TP_MIN_Y -200
+#define J314_TP_MAX_Y 7400
+#define J314_TP_RES_Y \
+	((J314_TP_MAX_Y - J314_TP_MIN_Y) / (J314_TP_DIMENSION_Y / 100))
+
+#define T2_TOUCHPAD_ENTRY(model) \
+	{ USB_DEVICE_ID_APPLE_WELLSPRINGT2_##model, model##_TP_MIN_X, model##_TP_MIN_Y, \
+model##_TP_MAX_X, model##_TP_MAX_Y, model##_TP_RES_X, model##_TP_RES_Y }
+
+#define INTERNAL_TP_MAX_FINGER_ORIENTATION 16384
+
+struct magicmouse_input_ops {
+	int (*raw_event)(struct hid_device *hdev,
+		struct hid_report *report, u8 *data, int size);
+	int (*setup_input)(struct input_dev *input, struct hid_device *hdev);
+};
+
 /**
  * struct magicmouse_sc - Tracks Magic Mouse-specific data.
  * @input: Input device through which we report events.
  * @quirks: Currently unused.
+ * @query_dimensions: Whether to query and update dimensions on first open
  * @ntouches: Number of touches in most recent touch report.
  * @scroll_accel: Number of consecutive scroll motions.
  * @scroll_jiffies: Time of last scroll motion.
+ * @pos: multi touch position data of the last report.
  * @touches: Most recent data for a touch, indexed by tracking ID.
  * @tracking_ids: Mapping of current touch input data to @touches.
  * @hdev: Pointer to the underlying HID device.
  * @work: Workqueue to handle initialization retry for quirky devices.
  * @battery_timer: Timer for obtaining battery level information.
+ * @input_ops: Input ops based on device type.
  */
 struct magicmouse_sc {
 	struct input_dev *input;
 	unsigned long quirks;
+	bool query_dimensions;
 
 	int ntouches;
 	int scroll_accel;
 	unsigned long scroll_jiffies;
 
+	struct input_mt_pos pos[MAX_CONTACTS];
 	struct {
-		short x;
-		short y;
 		short scroll_x;
 		short scroll_y;
 		short scroll_x_hr;
@@ -143,14 +275,112 @@ struct magicmouse_sc {
 		u8 size;
 		bool scroll_x_active;
 		bool scroll_y_active;
-	} touches[16];
-	int tracking_ids[16];
+	} touches[MAX_CONTACTS];
+	int tracking_ids[MAX_CONTACTS];
 
 	struct hid_device *hdev;
 	struct delayed_work work;
 	struct timer_list battery_timer;
+	struct magicmouse_input_ops input_ops;
 };
 
+static int magicmouse_enable_multitouch(struct hid_device *hdev);
+
+static inline int le16_to_int(__le16 x)
+{
+	return (signed short)le16_to_cpu(x);
+}
+
+static int magicmouse_open(struct input_dev *dev)
+{
+	struct hid_device *hdev = input_get_drvdata(dev);
+	struct magicmouse_sc *msc = hid_get_drvdata(hdev);
+	int ret;
+
+	ret = hid_hw_open(hdev);
+	if (ret)
+		return ret;
+
+	/*
+	 * Some devices repond with 'invalid report id' when feature
+	 * report switching it into multitouch mode is sent to it.
+	 *
+	 * This results in -EIO from the _raw low-level transport callback,
+	 * but there seems to be no other way of switching the mode.
+	 * Thus the super-ugly hacky success check below.
+	 *
+	 * MTP devices do not need this.
+	 */
+	if (hdev->bus != BUS_HOST) {
+		ret = magicmouse_enable_multitouch(hdev);
+		if (ret != -EIO && ret < 0) {
+			hid_err(hdev, "unable to request touch data (%d)\n", ret);
+			return ret;
+		}
+		if (ret == -EIO && (hdev->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 ||
+				hdev->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC)) {
+			schedule_delayed_work(&msc->work, msecs_to_jiffies(500));
+		}
+	}
+
+	/*
+	 * For Apple Silicon trackpads, we want to query the dimensions on
+	 * device open. This is because doing so requires the firmware, but
+	 * we don't want to force a firmware load until the device is opened
+	 * for the first time. So do that here and update the input properties
+	 * just in time before userspace queries them.
+	 */
+	if (msc->query_dimensions) {
+		struct input_dev *input = msc->input;
+		u8 buf[32];
+		struct {
+			__le32 width;
+			__le32 height;
+			__le16 min_x;
+			__le16 min_y;
+			__le16 max_x;
+			__le16 max_y;
+		} dim;
+		uint32_t x_span, y_span;
+
+		ret = hid_hw_raw_request(hdev, SENSOR_DIMENSIONS_REPORT_ID, buf, sizeof(buf), HID_FEATURE_REPORT, HID_REQ_GET_REPORT);
+		if (ret < (int)(1 + sizeof(dim))) {
+			hid_err(hdev, "unable to request dimensions (%d)\n", ret);
+			return ret;
+		}
+
+		memcpy(&dim, buf + 1, sizeof(dim));
+
+		/* finger position */
+		input_set_abs_params(input, ABS_MT_POSITION_X,
+				     le16_to_int(dim.min_x), le16_to_int(dim.max_x), 0, 0);
+		/* Y axis is inverted */
+		input_set_abs_params(input, ABS_MT_POSITION_Y,
+				     -le16_to_int(dim.max_y), -le16_to_int(dim.min_y), 0, 0);
+		x_span = le16_to_int(dim.max_x) - le16_to_int(dim.min_x);
+		y_span = le16_to_int(dim.max_y) - le16_to_int(dim.min_y);
+
+		/* X/Y resolution */
+		input_abs_set_res(input, ABS_MT_POSITION_X, 100 * x_span / le32_to_cpu(dim.width) );
+		input_abs_set_res(input, ABS_MT_POSITION_Y, 100 * y_span / le32_to_cpu(dim.height) );
+
+		/* copy info, as input_mt_init_slots() does */
+		dev->absinfo[ABS_X] = dev->absinfo[ABS_MT_POSITION_X];
+		dev->absinfo[ABS_Y] = dev->absinfo[ABS_MT_POSITION_Y];
+
+		msc->query_dimensions = false;
+	}
+
+	return 0;
+}
+
+static void magicmouse_close(struct input_dev *dev)
+{
+	struct hid_device *hdev = input_get_drvdata(dev);
+
+	hid_hw_close(hdev);
+}
+
 static int magicmouse_firm_touch(struct magicmouse_sc *msc)
 {
 	int touch = -1;
@@ -192,7 +422,7 @@ static void magicmouse_emit_buttons(struct magicmouse_sc *msc, int state)
 		} else if (last_state != 0) {
 			state = last_state;
 		} else if ((id = magicmouse_firm_touch(msc)) >= 0) {
-			int x = msc->touches[id].x;
+			int x = msc->pos[id].x;
 			if (x < middle_button_start)
 				state = 1;
 			else if (x > middle_button_stop)
@@ -256,8 +486,8 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda
 
 	/* Store tracking ID and other fields. */
 	msc->tracking_ids[raw_id] = id;
-	msc->touches[id].x = x;
-	msc->touches[id].y = y;
+	msc->pos[id].x = x;
+	msc->pos[id].y = y;
 	msc->touches[id].size = size;
 
 	/* If requested, emulate a scroll wheel by detecting small
@@ -387,6 +617,14 @@ static int magicmouse_raw_event(struct hid_device *hdev,
 		struct hid_report *report, u8 *data, int size)
 {
 	struct magicmouse_sc *msc = hid_get_drvdata(hdev);
+
+	return msc->input_ops.raw_event(hdev, report, data, size);
+}
+
+static int magicmouse_raw_event_usb(struct hid_device *hdev,
+		struct hid_report *report, u8 *data, int size)
+{
+	struct magicmouse_sc *msc = hid_get_drvdata(hdev);
 	struct input_dev *input = msc->input;
 	int x = 0, y = 0, ii, clicks = 0, npoints;
 
@@ -518,6 +756,188 @@ static int magicmouse_raw_event(struct hid_device *hdev,
 	return 1;
 }
 
+/**
+ * struct tp_finger - single trackpad finger structure, le16-aligned
+ *
+ * @unknown1:		unknown
+ * @unknown2:		unknown
+ * @abs_x:		absolute x coordinate
+ * @abs_y:		absolute y coordinate
+ * @rel_x:		relative x coordinate
+ * @rel_y:		relative y coordinate
+ * @tool_major:		tool area, major axis
+ * @tool_minor:		tool area, minor axis
+ * @orientation:	16384 when point, else 15 bit angle
+ * @touch_major:	touch area, major axis
+ * @touch_minor:	touch area, minor axis
+ * @unused:		zeros
+ * @pressure:		pressure on forcetouch touchpad
+ * @multi:		one finger: varies, more fingers: constant
+ */
+struct tp_finger {
+	__le16 unknown1;
+	__le16 unknown2;
+	__le16 abs_x;
+	__le16 abs_y;
+	__le16 rel_x;
+	__le16 rel_y;
+	__le16 tool_major;
+	__le16 tool_minor;
+	__le16 orientation;
+	__le16 touch_major;
+	__le16 touch_minor;
+	__le16 unused[2];
+	__le16 pressure;
+	__le16 multi;
+} __attribute__((packed, aligned(2)));
+
+/**
+ * vendor trackpad report
+ *
+ * @num_fingers:	the number of fingers being reported in @fingers
+ * @buttons:		same as HID buttons
+ */
+struct tp_header {
+	// HID vendor part, up to 1751 bytes
+	u8 unknown[22];
+	u8 num_fingers;
+	u8 buttons;
+	u8 unknown3[14];
+};
+
+/**
+ * standard HID mouse report
+ *
+ * @report_id:		reportid
+ * @buttons:		HID Usage Buttons 3 1-bit reports
+ */
+struct tp_mouse_report {
+	// HID mouse report
+	u8 report_id;
+	u8 buttons;
+	u8 rel_x;
+	u8 rel_y;
+	u8 padding[4];
+};
+
+static void report_finger_data(struct input_dev *input, int slot,
+			       const struct input_mt_pos *pos,
+			       const struct tp_finger *f)
+{
+	input_mt_slot(input, slot);
+	input_mt_report_slot_state(input, MT_TOOL_FINGER, true);
+
+	input_report_abs(input, ABS_MT_TOUCH_MAJOR,
+			 le16_to_int(f->touch_major) << 1);
+	input_report_abs(input, ABS_MT_TOUCH_MINOR,
+			 le16_to_int(f->touch_minor) << 1);
+	input_report_abs(input, ABS_MT_WIDTH_MAJOR,
+			 le16_to_int(f->tool_major) << 1);
+	input_report_abs(input, ABS_MT_WIDTH_MINOR,
+			 le16_to_int(f->tool_minor) << 1);
+	input_report_abs(input, ABS_MT_ORIENTATION,
+			 INTERNAL_TP_MAX_FINGER_ORIENTATION - le16_to_int(f->orientation));
+	input_report_abs(input, ABS_MT_PRESSURE, le16_to_int(f->pressure));
+	input_report_abs(input, ABS_MT_POSITION_X, pos->x);
+	input_report_abs(input, ABS_MT_POSITION_Y, pos->y);
+}
+
+static int magicmouse_raw_event_mtp(struct hid_device *hdev,
+		struct hid_report *report, u8 *data, int size)
+{
+	struct magicmouse_sc *msc = hid_get_drvdata(hdev);
+	struct input_dev *input = msc->input;
+	struct tp_header *tp_hdr;
+	struct tp_finger *f;
+	int i, n;
+	u32 npoints;
+	const size_t hdr_sz = sizeof(struct tp_header);
+	const size_t touch_sz = sizeof(struct tp_finger);
+	u8 map_contacs[MAX_CONTACTS];
+
+	// hid_warn(hdev, "%s\n", __func__);
+	// print_hex_dump_debug("appleft ev: ", DUMP_PREFIX_OFFSET, 16, 1, data,
+	// 		     size, false);
+
+	/* Expect 46 bytes of prefix, and N * 30 bytes of touch data. */
+	if (size < hdr_sz || ((size - hdr_sz) % touch_sz) != 0)
+		return 0;
+
+	tp_hdr = (struct tp_header *)data;
+
+	npoints = (size - hdr_sz) / touch_sz;
+	if (npoints < tp_hdr->num_fingers || npoints > MAX_CONTACTS) {
+		hid_warn(hdev,
+			 "unexpected number of touches (%u) for "
+			 "report\n",
+			 npoints);
+		return 0;
+	}
+
+	n = 0;
+	for (i = 0; i < tp_hdr->num_fingers; i++) {
+		f = (struct tp_finger *)(data + hdr_sz + i * touch_sz);
+		if (le16_to_int(f->touch_major) == 0)
+			continue;
+
+		hid_dbg(hdev, "ev x:%04x y:%04x\n", le16_to_int(f->abs_x),
+			le16_to_int(f->abs_y));
+		msc->pos[n].x = le16_to_int(f->abs_x);
+		msc->pos[n].y = -le16_to_int(f->abs_y);
+		map_contacs[n] = i;
+		n++;
+	}
+
+	input_mt_assign_slots(input, msc->tracking_ids, msc->pos, n, 0);
+
+	for (i = 0; i < n; i++) {
+		int idx = map_contacs[i];
+		f = (struct tp_finger *)(data + hdr_sz + idx * touch_sz);
+		report_finger_data(input, msc->tracking_ids[i], &msc->pos[i], f);
+	}
+
+	input_mt_sync_frame(input);
+	input_report_key(input, BTN_MOUSE, tp_hdr->buttons & 1);
+
+	input_sync(input);
+	return 1;
+}
+
+static int magicmouse_raw_event_spi(struct hid_device *hdev,
+		struct hid_report *report, u8 *data, int size)
+{
+	struct magicmouse_sc *msc = hid_get_drvdata(hdev);
+	const size_t hdr_sz = sizeof(struct tp_mouse_report);
+
+	if (!size)
+		return 0;
+
+	if (data[0] == SPI_RESET_REPORT_ID) {
+		hid_info(hdev, "Touch controller was reset, re-enabling touch mode\n");
+		schedule_delayed_work(&msc->work, msecs_to_jiffies(10));
+		return 1;
+	}
+
+	if (data[0] != SPI_REPORT_ID || size < hdr_sz)
+		return 0;
+
+	return magicmouse_raw_event_mtp(hdev, report, data + hdr_sz, size - hdr_sz);
+}
+
+static int magicmouse_raw_event_t2(struct hid_device *hdev,
+		struct hid_report *report, u8 *data, int size)
+{
+	const size_t hdr_sz = sizeof(struct tp_mouse_report);
+
+	if (!size)
+		return 0;
+
+	if (data[0] != TRACKPAD2_USB_REPORT_ID || size < hdr_sz)
+		return 0;
+
+	return magicmouse_raw_event_mtp(hdev, report, data + hdr_sz, size - hdr_sz);
+}
+
 static int magicmouse_event(struct hid_device *hdev, struct hid_field *field,
 		struct hid_usage *usage, __s32 value)
 {
@@ -536,7 +956,17 @@ static int magicmouse_event(struct hid_device *hdev, struct hid_field *field,
 	return 0;
 }
 
-static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hdev)
+
+static int magicmouse_setup_input(struct input_dev *input,
+				  struct hid_device *hdev)
+{
+	struct magicmouse_sc *msc = hid_get_drvdata(hdev);
+
+	return msc->input_ops.setup_input(input, hdev);
+}
+
+static int magicmouse_setup_input_usb(struct input_dev *input,
+				      struct hid_device *hdev)
 {
 	int error;
 	int mt_flags = 0;
@@ -615,7 +1045,7 @@ static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hd
 
 	__set_bit(EV_ABS, input->evbit);
 
-	error = input_mt_init_slots(input, 16, mt_flags);
+	error = input_mt_init_slots(input, MAX_CONTACTS, mt_flags);
 	if (error)
 		return error;
 	input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, 0, 255 << 2,
@@ -695,6 +1125,171 @@ static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hd
 	 */
 	__clear_bit(EV_REP, input->evbit);
 
+	/*
+	 * This isn't strictly speaking needed for USB, but enabling MT on
+	 * device open is probably more robust than only doing it once on probe
+	 * even if USB devices are not known to suffer from the SPI reset issue.
+	 */
+	input->open = magicmouse_open;
+	input->close = magicmouse_close;
+	return 0;
+}
+
+struct magicmouse_t2_properties {
+	u32 id;
+	int min_x;
+	int min_y;
+	int max_x;
+	int max_y;
+	int res_x;
+	int res_y;
+};
+
+static const struct magicmouse_t2_properties magicmouse_t2_configs[] = {
+	T2_TOUCHPAD_ENTRY(J140K),
+	T2_TOUCHPAD_ENTRY(J132),
+	T2_TOUCHPAD_ENTRY(J680),
+	T2_TOUCHPAD_ENTRY(J680_ALT),
+	T2_TOUCHPAD_ENTRY(J213),
+	T2_TOUCHPAD_ENTRY(J214K),
+	T2_TOUCHPAD_ENTRY(J223),
+	T2_TOUCHPAD_ENTRY(J230K),
+	T2_TOUCHPAD_ENTRY(J152F),
+};
+
+static int magicmouse_setup_input_int_tpd(struct input_dev *input,
+					  struct hid_device *hdev, int min_x, int min_y,
+					  int max_x, int max_y, int res_x, int res_y,
+					  bool query_dimensions)
+{
+	int error;
+	int mt_flags = 0;
+	struct magicmouse_sc *msc = hid_get_drvdata(hdev);
+
+	__set_bit(INPUT_PROP_BUTTONPAD, input->propbit);
+	__clear_bit(BTN_0, input->keybit);
+	__clear_bit(BTN_RIGHT, input->keybit);
+	__clear_bit(BTN_MIDDLE, input->keybit);
+	__clear_bit(EV_REL, input->evbit);
+	__clear_bit(REL_X, input->relbit);
+	__clear_bit(REL_Y, input->relbit);
+
+	mt_flags = INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED | INPUT_MT_TRACK;
+
+	/* finger touch area */
+	input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, 0, 5000, 0, 0);
+	input_set_abs_params(input, ABS_MT_TOUCH_MINOR, 0, 5000, 0, 0);
+
+	/* finger approach area */
+	input_set_abs_params(input, ABS_MT_WIDTH_MAJOR, 0, 5000, 0, 0);
+	input_set_abs_params(input, ABS_MT_WIDTH_MINOR, 0, 5000, 0, 0);
+
+	/* Note: Touch Y position from the device is inverted relative
+	 * to how pointer motion is reported (and relative to how USB
+	 * HID recommends the coordinates work).  This driver keeps
+	 * the origin at the same position, and just uses the additive
+	 * inverse of the reported Y.
+	 */
+
+	input_set_abs_params(input, ABS_MT_PRESSURE, 0, 6000, 0, 0);
+
+	/*
+	 * This makes libinput recognize this as a PressurePad and
+	 * stop trying to use pressure for touch size. Pressure unit
+	 * seems to be ~grams on these touchpads.
+	 */
+	input_abs_set_res(input, ABS_MT_PRESSURE, 1);
+
+	/* finger orientation */
+	input_set_abs_params(input, ABS_MT_ORIENTATION, -INTERNAL_TP_MAX_FINGER_ORIENTATION,
+			     INTERNAL_TP_MAX_FINGER_ORIENTATION, 0, 0);
+
+	/* finger position */
+	input_set_abs_params(input, ABS_MT_POSITION_X, min_x, max_x, 0, 0);
+	/* Y axis is inverted */
+	input_set_abs_params(input, ABS_MT_POSITION_Y, -max_y, -min_y, 0, 0);
+
+	/* X/Y resolution */
+	input_abs_set_res(input, ABS_MT_POSITION_X, res_x);
+	input_abs_set_res(input, ABS_MT_POSITION_Y, res_y);
+
+	input_set_events_per_packet(input, 60);
+
+	/* touchpad button */
+	input_set_capability(input, EV_KEY, BTN_MOUSE);
+
+	/*
+	 * hid-input may mark device as using autorepeat, but the trackpad does
+	 * not actually want it.
+	 */
+	__clear_bit(EV_REP, input->evbit);
+
+	error = input_mt_init_slots(input, MAX_CONTACTS, mt_flags);
+	if (error)
+		return error;
+
+	/*
+	 * Override the default input->open function to send the MT
+	 * enable every time the device is opened. This ensures it works
+	 * even if we missed a reset event due to the device being closed.
+	 * input->close is overridden for symmetry.
+	 *
+	 * This also takes care of the dimensions query.
+	 */
+	input->open = magicmouse_open;
+	input->close = magicmouse_close;
+	msc->query_dimensions = query_dimensions;
+
+	return 0;
+}
+
+static int magicmouse_setup_input_mtp(struct input_dev *input,
+				      struct hid_device *hdev)
+{
+	int ret = magicmouse_setup_input_int_tpd(input, hdev, J314_TP_MIN_X,
+						 J314_TP_MIN_Y, J314_TP_MAX_X,
+						 J314_TP_MAX_Y, J314_TP_RES_X,
+						 J314_TP_RES_Y, true);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int magicmouse_setup_input_spi(struct input_dev *input,
+				      struct hid_device *hdev)
+{
+	int ret = magicmouse_setup_input_int_tpd(input, hdev, J314_TP_MIN_X,
+						 J314_TP_MIN_Y, J314_TP_MAX_X,
+						 J314_TP_MAX_Y, J314_TP_RES_X,
+						 J314_TP_RES_Y, true);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int magicmouse_setup_input_t2(struct input_dev *input,
+				      struct hid_device *hdev)
+{
+	int min_x, min_y, max_x, max_y, res_x, res_y;
+
+	for (size_t i = 0; i < ARRAY_SIZE(magicmouse_t2_configs); i++) {
+		if (magicmouse_t2_configs[i].id == hdev->product) {
+			min_x = magicmouse_t2_configs[i].min_x;
+			min_y = magicmouse_t2_configs[i].min_y;
+			max_x = magicmouse_t2_configs[i].max_x;
+			max_y = magicmouse_t2_configs[i].max_y;
+			res_x = magicmouse_t2_configs[i].res_x;
+			res_y = magicmouse_t2_configs[i].res_y;
+		}
+	}
+
+	int ret = magicmouse_setup_input_int_tpd(input, hdev, min_x, min_y,
+						 max_x, max_y, res_x, res_y, false);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -748,6 +1343,10 @@ static int magicmouse_enable_multitouch(struct hid_device *hdev)
 	int feature_size;
 
 	switch (hdev->product) {
+	case SPI_DEVICE_ID_APPLE_MACBOOK_AIR_2020:
+	case SPI_DEVICE_ID_APPLE_MACBOOK_PRO13_2020:
+	case SPI_DEVICE_ID_APPLE_MACBOOK_PRO14_2021:
+	case SPI_DEVICE_ID_APPLE_MACBOOK_PRO16_2021:
 	case USB_DEVICE_ID_APPLE_MAGICTRACKPAD2:
 	case USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC:
 		switch (hdev->vendor) {
@@ -755,11 +1354,23 @@ static int magicmouse_enable_multitouch(struct hid_device *hdev)
 			feature_size = sizeof(feature_mt_trackpad2_bt);
 			feature = feature_mt_trackpad2_bt;
 			break;
-		default: /* USB_VENDOR_ID_APPLE */
+		default:  /* USB_VENDOR_ID_APPLE || SPI_VENDOR_ID_APPLE */
 			feature_size = sizeof(feature_mt_trackpad2_usb);
 			feature = feature_mt_trackpad2_usb;
 		}
 		break;
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680_ALT:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F:
+		feature_size = sizeof(feature_mt_trackpad2_usb);
+		feature = feature_mt_trackpad2_usb;
+		break;
 	case USB_DEVICE_ID_APPLE_MAGICMOUSE2:
 	case USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC:
 		feature_size = sizeof(feature_mt_mouse2);
@@ -852,12 +1463,62 @@ static int magicmouse_probe(struct hid_device *hdev,
 	struct hid_report *report;
 	int ret;
 
+	if ((id->bus == BUS_SPI || id->bus == BUS_HOST) && id->vendor == SPI_VENDOR_ID_APPLE &&
+ 	    hdev->type != HID_TYPE_SPI_MOUSE)
+ 		return -ENODEV;
+
+	switch (id->product) {
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680_ALT:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F:
+		if (hdev->type != HID_TYPE_USBMOUSE)
+			return -ENODEV;
+		break;
+	}
+
 	msc = devm_kzalloc(&hdev->dev, sizeof(*msc), GFP_KERNEL);
 	if (msc == NULL) {
 		hid_err(hdev, "can't alloc magicmouse descriptor\n");
 		return -ENOMEM;
 	}
 
+	// internal trackpad use a data format use input ops to avoid
+	// conflicts with the report ID.
+	switch (id->bus) {
+	case BUS_HOST:
+		msc->input_ops.raw_event = magicmouse_raw_event_mtp;
+		msc->input_ops.setup_input = magicmouse_setup_input_mtp;
+		break;
+	case BUS_SPI:
+		msc->input_ops.raw_event = magicmouse_raw_event_spi;
+		msc->input_ops.setup_input = magicmouse_setup_input_spi;
+		break;
+	default:
+		switch (id->product) {
+		case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K:
+		case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132:
+		case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680:
+		case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680_ALT:
+		case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213:
+		case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K:
+		case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223:
+		case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K:
+		case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F:
+			msc->input_ops.raw_event = magicmouse_raw_event_t2;
+			msc->input_ops.setup_input = magicmouse_setup_input_t2;
+			break;
+		default:
+			msc->input_ops.raw_event = magicmouse_raw_event_usb;
+			msc->input_ops.setup_input = magicmouse_setup_input_usb;
+		}
+	}
+
 	msc->scroll_accel = SCROLL_ACCEL_DEFAULT;
 	msc->hdev = hdev;
 	INIT_DEFERRABLE_WORK(&msc->work, magicmouse_enable_mt_work);
@@ -916,11 +1577,32 @@ static int magicmouse_probe(struct hid_device *hdev,
 				TRACKPAD2_USB_REPORT_ID, 0);
 		}
 		break;
-	default: /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
-		report = hid_register_report(hdev, HID_INPUT_REPORT,
-			TRACKPAD_REPORT_ID, 0);
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680_ALT:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K:
+	case USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F:
 		report = hid_register_report(hdev, HID_INPUT_REPORT,
-			DOUBLE_REPORT_ID, 0);
+			TRACKPAD2_USB_REPORT_ID, 0);
+		break;
+	default:
+		switch (id->bus) {
+		case BUS_HOST:
+			report = hid_register_report(hdev, HID_INPUT_REPORT, MTP_REPORT_ID, 0);
+			break;
+		case BUS_SPI:
+			report = hid_register_report(hdev, HID_INPUT_REPORT, SPI_REPORT_ID, 0);
+			break;
+		default: /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
+			report = hid_register_report(hdev, HID_INPUT_REPORT,
+				TRACKPAD_REPORT_ID, 0);
+			report = hid_register_report(hdev, HID_INPUT_REPORT,
+				DOUBLE_REPORT_ID, 0);
+		}
 	}
 
 	if (!report) {
@@ -930,22 +1612,14 @@ static int magicmouse_probe(struct hid_device *hdev,
 	}
 	report->size = 6;
 
-	/*
-	 * Some devices repond with 'invalid report id' when feature
-	 * report switching it into multitouch mode is sent to it.
-	 *
-	 * This results in -EIO from the _raw low-level transport callback,
-	 * but there seems to be no other way of switching the mode.
-	 * Thus the super-ugly hacky success check below.
-	 */
-	ret = magicmouse_enable_multitouch(hdev);
-	if (ret != -EIO && ret < 0) {
-		hid_err(hdev, "unable to request touch data (%d)\n", ret);
-		goto err_stop_hw;
-	}
-	if (ret == -EIO && (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 ||
-			    id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC)) {
-		schedule_delayed_work(&msc->work, msecs_to_jiffies(500));
+	/* MTP devices do not need the MT enable, this is handled by the MTP driver */
+	if (id->bus == BUS_HOST)
+		return 0;
+
+	/* SPI devices need to watch for reset events to re-send the MT enable */
+	if (id->bus == BUS_SPI) {
+		report = hid_register_report(hdev, HID_INPUT_REPORT, SPI_RESET_REPORT_ID, 0);
+		report->size = 2;
 	}
 
 	return 0;
@@ -1023,10 +1697,42 @@ static const struct hid_device_id magic_mice[] = {
 		USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC), .driver_data = 0 },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
 		USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC), .driver_data = 0 },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K), .driver_data = 0 },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132), .driver_data = 0 },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680), .driver_data = 0 },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680_ALT), .driver_data = 0 },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213), .driver_data = 0 },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K), .driver_data = 0 },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223), .driver_data = 0 },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K), .driver_data = 0 },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F), .driver_data = 0 },
+	{ HID_SPI_DEVICE(SPI_VENDOR_ID_APPLE, HID_ANY_ID),
+	  .driver_data = 0 },
+	{ HID_DEVICE(BUS_HOST, HID_GROUP_ANY, HOST_VENDOR_ID_APPLE,
+                     HID_ANY_ID), .driver_data = 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, magic_mice);
 
+#ifdef CONFIG_PM
+static int magicmouse_reset_resume(struct hid_device *hdev)
+{
+	if (hdev->bus == BUS_SPI)
+		return magicmouse_enable_multitouch(hdev);
+
+	return 0;
+}
+#endif
+
 static struct hid_driver magicmouse_driver = {
 	.name = "magicmouse",
 	.id_table = magic_mice,
@@ -1037,6 +1743,10 @@ static struct hid_driver magicmouse_driver = {
 	.event = magicmouse_event,
 	.input_mapping = magicmouse_input_mapping,
 	.input_configured = magicmouse_input_configured,
+#ifdef CONFIG_PM
+        .reset_resume = magicmouse_reset_resume,
+#endif
+
 };
 module_hid_driver(magicmouse_driver);
 
diff --git a/drivers/hid/spi-hid/Kconfig b/drivers/hid/spi-hid/Kconfig
new file mode 100644
index 000000000000..8e37f0fec28a
--- /dev/null
+++ b/drivers/hid/spi-hid/Kconfig
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0-only
+menu "SPI HID support"
+	depends on SPI
+
+config SPI_HID_APPLE_OF
+	tristate "HID over SPI transport layer for Apple Silicon SoCs"
+	default ARCH_APPLE
+	depends on SPI && INPUT && OF
+	help
+	  Say Y here if you use Apple Silicon based laptop. The keyboard and
+	  touchpad are HID based devices connected via SPI.
+
+	  If unsure, say N.
+
+	  This support is also available as a module.  If so, the module
+	  will be called spi-hid-apple-of. It will also build/depend on the
+	  module spi-hid-apple.
+
+endmenu
+
+config SPI_HID_APPLE_CORE
+	tristate
+	default y if SPI_HID_APPLE_OF=y
+	default m if SPI_HID_APPLE_OF=m
+	select HID
+	select CRC16
diff --git a/drivers/hid/spi-hid/Makefile b/drivers/hid/spi-hid/Makefile
new file mode 100644
index 000000000000..f276ee12cb94
--- /dev/null
+++ b/drivers/hid/spi-hid/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for SPI HID tarnsport drivers
+#
+
+obj-$(CONFIG_SPI_HID_APPLE_CORE)		+= spi-hid-apple.o
+
+spi-hid-apple-objs				=  spi-hid-apple-core.o
+
+obj-$(CONFIG_SPI_HID_APPLE_OF)			+= spi-hid-apple-of.o
diff --git a/drivers/hid/spi-hid/spi-hid-apple-core.c b/drivers/hid/spi-hid/spi-hid-apple-core.c
new file mode 100644
index 000000000000..2ed909895391
--- /dev/null
+++ b/drivers/hid/spi-hid/spi-hid-apple-core.c
@@ -0,0 +1,1194 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Apple SPI HID transport driver
+ *
+ * Copyright (C) The Asahi Linux Contributors
+ *
+ * Based on: drivers/input/applespi.c
+ *
+ * MacBook (Pro) SPI keyboard and touchpad driver
+ *
+ * Copyright (c) 2015-2018 Federico Lorenzi
+ * Copyright (c) 2017-2018 Ronald Tschalär
+ *
+ */
+
+//#define DEBUG 2
+
+#include <linux/crc16.h>
+#include <linux/delay.h>
+#include <linux/device/driver.h>
+#include <linux/hid.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/printk.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/unaligned.h>
+#include <linux/wait.h>
+
+#include "spi-hid-apple.h"
+
+#define SPIHID_DEF_WAIT msecs_to_jiffies(1000)
+
+#define SPIHID_MAX_INPUT_REPORT_SIZE 0x800
+
+/* support only keyboard, trackpad and management dev for now */
+#define SPIHID_MAX_DEVICES 3
+
+#define SPIHID_DEVICE_ID_MNGT 0x0
+#define SPIHID_DEVICE_ID_KBD 0x1
+#define SPIHID_DEVICE_ID_TP 0x2
+#define SPIHID_DEVICE_ID_INFO 0xd0
+
+#define SPIHID_READ_PACKET 0x20
+#define SPIHID_WRITE_PACKET 0x40
+
+#define SPIHID_DESC_MAX 512
+
+#define SPIHID_SET_LEDS 0x0151 /* caps lock */
+
+#define SPI_RW_CHG_DELAY_US 200 /* 'Inter Stage Us'? */
+
+static const u8 spi_hid_apple_booted[4] = { 0xa0, 0x80, 0x00, 0x00 };
+static const u8 spi_hid_apple_status_ok[4] = { 0xac, 0x27, 0x68, 0xd5 };
+
+struct spihid_interface {
+	struct hid_device *hid;
+	u8 *hid_desc;
+	u32 hid_desc_len;
+	u32 id;
+	unsigned country;
+	u32 max_control_report_len;
+	u32 max_input_report_len;
+	u32 max_output_report_len;
+	u8 name[32];
+	u8 reply_buf[SPIHID_DESC_MAX];
+	u32 reply_len;
+	bool ready;
+};
+
+struct spihid_input_report {
+	u8 *buf;
+	u32 length;
+	u32 offset;
+	u8 device;
+	u8 flags;
+};
+
+struct spihid_apple {
+	struct spi_device *spidev;
+
+	struct spihid_apple_ops *ops;
+
+	struct spihid_interface mngt;
+	struct spihid_interface kbd;
+	struct spihid_interface tp;
+
+	wait_queue_head_t wait;
+	struct mutex tx_lock; //< protects against concurrent SPI writes
+
+	struct spi_message rx_msg;
+	struct spi_message tx_msg;
+	struct spi_transfer rx_transfer;
+	struct spi_transfer tx_transfer;
+	struct spi_transfer status_transfer;
+
+	u8 *rx_buf;
+	u8 *tx_buf;
+	u8 *status_buf;
+
+	u8 vendor[32];
+	u8 product[64];
+	u8 serial[32];
+
+	u32 num_devices;
+
+	u32 vendor_id;
+	u32 product_id;
+	u32 version_number;
+
+	u8 msg_id;
+
+	/* fragmented HID report */
+	struct spihid_input_report report;
+
+	/* state tracking flags */
+	bool status_booted;
+
+#ifdef IRQ_WAKE_SUPPORT
+	bool irq_wake_enabled;
+#endif
+};
+
+/**
+ * struct spihid_msg_hdr - common header of protocol messages.
+ *
+ * Each message begins with fixed header, followed by a message-type specific
+ * payload, and ends with a 16-bit crc. Because of the varying lengths of the
+ * payload, the crc is defined at the end of each payload struct, rather than
+ * in this struct.
+ *
+ * @unknown0:	request type? output, input (0x10), feature, protocol
+ * @unknown1:	maybe report id?
+ * @unknown2:	mostly zero, in info request maybe device num
+ * @id:		incremented on each message, rolls over after 255; there is a
+ *		separate counter for each message type.
+ * @rsplen:	response length (the exact nature of this field is quite
+ *		speculative). On a request/write this is often the same as
+ *		@length, though in some cases it has been seen to be much larger
+ *		(e.g. 0x400); on a response/read this the same as on the
+ *		request; for reads that are not responses it is 0.
+ * @length:	length of the remainder of the data in the whole message
+ *		structure (after re-assembly in case of being split over
+ *		multiple spi-packets), minus the trailing crc. The total size
+ *		of a message is therefore @length + 10.
+ */
+
+struct spihid_msg_hdr {
+	u8 unknown0;
+	u8 unknown1;
+	u8 unknown2;
+	u8 id;
+	__le16 rsplen;
+	__le16 length;
+};
+
+/**
+ * struct spihid_transfer_packet - a complete spi packet; always 256 bytes. This carries
+ * the (parts of the) message in the data. But note that this does not
+ * necessarily contain a complete message, as in some cases (e.g. many
+ * fingers pressed) the message is split over multiple packets (see the
+ * @offset, @remain, and @length fields). In general the data parts in
+ * spihid_transfer_packet's are concatenated until @remaining is 0, and the
+ * result is an message.
+ *
+ * @flags:	0x40 = write (to device), 0x20 = read (from device); note that
+ *		the response to a write still has 0x40.
+ * @device:	1 = keyboard, 2 = touchpad
+ * @offset:	specifies the offset of this packet's data in the complete
+ *		message; i.e. > 0 indicates this is a continuation packet (in
+ *		the second packet for a message split over multiple packets
+ *		this would then be the same as the @length in the first packet)
+ * @remain:	number of message bytes remaining in subsequents packets (in
+ *		the first packet of a message split over two packets this would
+ *		then be the same as the @length in the second packet)
+ * @length:	length of the valid data in the @data in this packet
+ * @data:	all or part of a message
+ * @crc16:	crc over this whole structure minus this @crc16 field. This
+ *		covers just this packet, even on multi-packet messages (in
+ *		contrast to the crc in the message).
+ */
+struct spihid_transfer_packet {
+	u8 flags;
+	u8 device;
+	__le16 offset;
+	__le16 remain;
+	__le16 length;
+	u8 data[246];
+	__le16 crc16;
+};
+
+/*
+ * how HID is mapped onto the protocol is not fully clear. This are the known
+ * reports/request:
+ *
+ *			pkt.flags	pkt.dev?	msg.u0	msg.u1	msg.u2
+ * info			0x40		0xd0		0x20	0x01	0xd0
+ *
+ * info mngt:		0x40		0xd0		0x20	0x10	0x00
+ * info kbd:		0x40		0xd0		0x20	0x10	0x01
+ * info tp:		0x40		0xd0		0x20	0x10	0x02
+ *
+ * desc kbd:		0x40		0xd0		0x20	0x10	0x01
+ * desc trackpad:	0x40		0xd0		0x20	0x10	0x02
+ *
+ * mt mode:		0x40		0x02		0x52	0x02	0x00	set protocol?
+ * capslock led		0x40		0x01		0x51	0x01	0x00	output report
+ *
+ * report kbd:		0x20		0x01		0x10	0x01	0x00	input report
+ * report tp:		0x20		0x02		0x10	0x02	0x00	input report
+ *
+ */
+
+
+static int spihid_apple_request(struct spihid_apple *spihid, u8 target, u8 unk0,
+				u8 unk1, u8 unk2, u16 resp_len, u8 *buf,
+				    size_t len)
+{
+	struct spihid_transfer_packet *pkt;
+	struct spihid_msg_hdr *hdr;
+	u16 crc;
+	int err;
+
+	/* know reports are small enoug to fit in a single packet */
+	if (len > sizeof(pkt->data) - sizeof(*hdr) - sizeof(__le16))
+		return -EINVAL;
+
+	err = mutex_lock_interruptible(&spihid->tx_lock);
+	if (err < 0)
+		return err;
+
+	pkt = (struct spihid_transfer_packet *)spihid->tx_buf;
+
+	memset(pkt, 0, sizeof(*pkt));
+	pkt->flags = SPIHID_WRITE_PACKET;
+	pkt->device = target;
+	pkt->length = cpu_to_le16(sizeof(*hdr) + len + sizeof(__le16));
+
+	hdr = (struct spihid_msg_hdr *)&pkt->data[0];
+	hdr->unknown0 = unk0;
+	hdr->unknown1 = unk1;
+	hdr->unknown2 = unk2;
+	hdr->id = spihid->msg_id++;
+	hdr->rsplen = cpu_to_le16(resp_len);
+	hdr->length = cpu_to_le16(len);
+
+	if (len)
+		memcpy(pkt->data + sizeof(*hdr), buf, len);
+	crc = crc16(0, &pkt->data[0], sizeof(*hdr) + len);
+	put_unaligned_le16(crc, pkt->data + sizeof(*hdr) + len);
+
+	pkt->crc16 = cpu_to_le16(crc16(0, spihid->tx_buf,
+				 offsetof(struct spihid_transfer_packet, crc16)));
+
+	memset(spihid->status_buf, 0, sizeof(spi_hid_apple_status_ok));
+
+	err = spi_sync(spihid->spidev, &spihid->tx_msg);
+
+	if (memcmp(spihid->status_buf, spi_hid_apple_status_ok,
+		   sizeof(spi_hid_apple_status_ok))) {
+		u8 *b = spihid->status_buf;
+		dev_warn_ratelimited(&spihid->spidev->dev, "status message "
+				     "mismatch: %02x %02x %02x %02x\n",
+				     b[0], b[1], b[2], b[3]);
+	}
+	mutex_unlock(&spihid->tx_lock);
+	if (err < 0)
+		return err;
+
+	return (int)len;
+}
+
+static struct spihid_apple *spihid_get_data(struct spihid_interface *idev)
+{
+	switch (idev->id) {
+	case SPIHID_DEVICE_ID_KBD:
+		return container_of(idev, struct spihid_apple, kbd);
+	case SPIHID_DEVICE_ID_TP:
+		return container_of(idev, struct spihid_apple, tp);
+	default:
+		return NULL;
+	}
+}
+
+static int apple_ll_start(struct hid_device *hdev)
+{
+	/* no-op SPI transport is already setup */
+	return 0;
+};
+
+static void apple_ll_stop(struct hid_device *hdev)
+{
+	/* no-op, devices will be desstroyed on driver destruction */
+}
+
+static int apple_ll_open(struct hid_device *hdev)
+{
+	struct spihid_apple *spihid;
+	struct spihid_interface *idev = hdev->driver_data;
+
+	if (idev->hid_desc_len == 0) {
+		spihid = spihid_get_data(idev);
+		dev_warn(&spihid->spidev->dev,
+			 "HID descriptor missing for dev %u", idev->id);
+	} else
+		idev->ready = true;
+
+	return 0;
+}
+
+static void apple_ll_close(struct hid_device *hdev)
+{
+	struct spihid_interface *idev = hdev->driver_data;
+	idev->ready = false;
+}
+
+static int apple_ll_parse(struct hid_device *hdev)
+{
+	struct spihid_interface *idev = hdev->driver_data;
+
+	return hid_parse_report(hdev, idev->hid_desc, idev->hid_desc_len);
+}
+
+static int apple_ll_raw_request(struct hid_device *hdev,
+				unsigned char reportnum, __u8 *buf, size_t len,
+				unsigned char rtype, int reqtype)
+{
+	struct spihid_interface *idev = hdev->driver_data;
+	struct spihid_apple *spihid = spihid_get_data(idev);
+	int ret;
+
+	dev_dbg(&spihid->spidev->dev,
+		"apple_ll_raw_request: device:%u reportnum:%hhu rtype:%hhu",
+		idev->id, reportnum, rtype);
+
+	switch (reqtype) {
+	case HID_REQ_GET_REPORT:
+		if (rtype != HID_FEATURE_REPORT)
+			return -EINVAL;
+
+		idev->reply_len = 0;
+		ret = spihid_apple_request(spihid, idev->id, 0x32, reportnum, 0x00, len, NULL, 0);
+		if (ret < 0)
+			return ret;
+
+		ret = wait_event_interruptible_timeout(spihid->wait, idev->reply_len,
+						       SPIHID_DEF_WAIT);
+		if (ret == 0)
+			ret = -ETIMEDOUT;
+		if (ret < 0) {
+			dev_err(&spihid->spidev->dev, "waiting for get report failed: %d", ret);
+			return ret;
+		}
+		memcpy(buf, idev->reply_buf, max_t(size_t, len, idev->reply_len));
+		return idev->reply_len;
+
+	case HID_REQ_SET_REPORT:
+		if (buf[0] != reportnum)
+			return -EINVAL;
+		if (reportnum != idev->id) {
+			dev_warn(&spihid->spidev->dev,
+				 "device:%u reportnum:"
+				 "%hhu mismatch",
+				 idev->id, reportnum);
+			return -EINVAL;
+		}
+		return spihid_apple_request(spihid, idev->id, 0x52, reportnum, 0x00, 2, buf, len);
+	default:
+		return -EIO;
+	}
+}
+
+static int apple_ll_output_report(struct hid_device *hdev, __u8 *buf,
+				  size_t len)
+{
+	struct spihid_interface *idev = hdev->driver_data;
+	struct spihid_apple *spihid = spihid_get_data(idev);
+	if (!spihid)
+		return -1;
+
+	dev_dbg(&spihid->spidev->dev,
+		"apple_ll_output_report: device:%u len:%zu:",
+		idev->id, len);
+	// second idev->id should maybe be buf[0]?
+	return spihid_apple_request(spihid, idev->id, 0x51, idev->id, 0x00, 0, buf, len);
+}
+
+static struct hid_ll_driver apple_hid_ll = {
+	.start = &apple_ll_start,
+	.stop = &apple_ll_stop,
+	.open = &apple_ll_open,
+	.close = &apple_ll_close,
+	.parse = &apple_ll_parse,
+	.raw_request = &apple_ll_raw_request,
+	.output_report = &apple_ll_output_report,
+	.max_buffer_size = SPIHID_MAX_INPUT_REPORT_SIZE,
+};
+
+static struct spihid_interface *spihid_get_iface(struct spihid_apple *spihid,
+						 u32 iface)
+{
+	switch (iface) {
+	case SPIHID_DEVICE_ID_MNGT:
+		return &spihid->mngt;
+	case SPIHID_DEVICE_ID_KBD:
+		return &spihid->kbd;
+	case SPIHID_DEVICE_ID_TP:
+		return &spihid->tp;
+	default:
+		return NULL;
+	}
+}
+
+static int spihid_verify_msg(struct spihid_apple *spihid, u8 *buf, size_t len)
+{
+	u16 msg_crc, crc;
+	struct device *dev = &spihid->spidev->dev;
+
+	crc = crc16(0, buf, len - sizeof(__le16));
+	msg_crc = get_unaligned_le16(buf + len - sizeof(__le16));
+	if (crc != msg_crc) {
+		dev_warn_ratelimited(dev, "Read message crc mismatch\n");
+		return 0;
+	}
+	return 1;
+}
+
+static bool spihid_status_report(struct spihid_apple *spihid, u8 *pl,
+				 size_t len)
+{
+	struct device *dev = &spihid->spidev->dev;
+	dev_dbg(dev, "%s: len: %zu", __func__, len);
+	if (len == 5 && pl[0] == 0xe0)
+		return true;
+
+	return false;
+}
+
+static bool spihid_process_input_report(struct spihid_apple *spihid, u32 device,
+					struct spihid_msg_hdr *hdr, u8 *payload,
+					size_t len)
+{
+	//dev_dbg(&spihid>spidev->dev, "input report: req:%hx iface:%u ", hdr->unknown0, device);
+	if (hdr->unknown0 != 0x10)
+		return false;
+
+	/* HID device as well but Vendor usage only, handle it internally for now */
+	if (device == 0) {
+		if (hdr->unknown1 == 0xe0) {
+			return spihid_status_report(spihid, payload, len);
+		}
+	} else if (device < SPIHID_MAX_DEVICES) {
+		struct spihid_interface *iface =
+			spihid_get_iface(spihid, device);
+		if (iface && iface->hid && iface->ready) {
+			hid_input_report(iface->hid, HID_INPUT_REPORT, payload,
+					 len, 1);
+			return true;
+		}
+	} else
+		dev_dbg(&spihid->spidev->dev,
+			"unexpected iface:%u for input report", device);
+
+	return false;
+}
+
+struct spihid_device_info {
+	__le16 u0[2];
+	__le16 num_devices;
+	__le16 vendor_id;
+	__le16 product_id;
+	__le16 version_number;
+	__le16 vendor_str[2]; //< offset and string length
+	__le16 product_str[2]; //< offset and string length
+	__le16 serial_str[2]; //< offset and string length
+};
+
+static bool spihid_process_device_info(struct spihid_apple *spihid, u32 iface,
+				       u8 *payload, size_t len)
+{
+	struct device *dev = &spihid->spidev->dev;
+
+	if (iface != SPIHID_DEVICE_ID_INFO)
+		return false;
+
+	if (spihid->vendor_id == 0 &&
+	    len >= sizeof(struct spihid_device_info)) {
+		struct spihid_device_info *info =
+			(struct spihid_device_info *)payload;
+		u16 voff, vlen, poff, plen, soff, slen;
+		u32 num_devices;
+
+		num_devices = __le16_to_cpu(info->num_devices);
+
+		if (num_devices < SPIHID_MAX_DEVICES) {
+			dev_err(dev,
+				"Device info reports %u devices, expecting at least 3",
+				num_devices);
+			return false;
+		}
+		spihid->num_devices = num_devices;
+
+		if (spihid->num_devices > SPIHID_MAX_DEVICES) {
+			dev_info(
+				dev,
+				"limiting the number of devices to mngt, kbd and mouse");
+			spihid->num_devices = SPIHID_MAX_DEVICES;
+		}
+
+		spihid->vendor_id = __le16_to_cpu(info->vendor_id);
+		spihid->product_id = __le16_to_cpu(info->product_id);
+		spihid->version_number = __le16_to_cpu(info->version_number);
+
+		voff = __le16_to_cpu(info->vendor_str[0]);
+		vlen = __le16_to_cpu(info->vendor_str[1]);
+
+		if (voff < len && vlen <= len - voff &&
+		    vlen < sizeof(spihid->vendor)) {
+			memcpy(spihid->vendor, payload + voff, vlen);
+			spihid->vendor[vlen] = '\0';
+		}
+
+		poff = __le16_to_cpu(info->product_str[0]);
+		plen = __le16_to_cpu(info->product_str[1]);
+
+		if (poff < len && plen <= len - poff &&
+		    plen < sizeof(spihid->product)) {
+			memcpy(spihid->product, payload + poff, plen);
+			spihid->product[plen] = '\0';
+		}
+
+		soff = __le16_to_cpu(info->serial_str[0]);
+		slen = __le16_to_cpu(info->serial_str[1]);
+
+		if (soff < len && slen <= len - soff &&
+		    slen < sizeof(spihid->serial)) {
+			memcpy(spihid->vendor, payload + soff, slen);
+			spihid->serial[slen] = '\0';
+		}
+
+		wake_up_interruptible(&spihid->wait);
+	}
+	return true;
+}
+
+struct spihid_iface_info {
+	u8 u_0;
+	u8 interface_num;
+	u8 u_2;
+	u8 u_3;
+	u8 u_4;
+	u8 country_code;
+	__le16 max_input_report_len;
+	__le16 max_output_report_len;
+	__le16 max_control_report_len;
+	__le16 name_offset;
+	__le16 name_length;
+};
+
+static bool spihid_process_iface_info(struct spihid_apple *spihid, u32 num,
+				      u8 *payload, size_t len)
+{
+	struct spihid_iface_info *info;
+	struct spihid_interface *iface = spihid_get_iface(spihid, num);
+	u32 name_off, name_len;
+
+	if (!iface)
+		return false;
+
+	if (!iface->max_input_report_len) {
+		if (len < sizeof(*info))
+			return false;
+
+		info = (struct spihid_iface_info *)payload;
+
+		iface->max_input_report_len =
+			le16_to_cpu(info->max_input_report_len);
+		iface->max_output_report_len =
+			le16_to_cpu(info->max_output_report_len);
+		iface->max_control_report_len =
+			le16_to_cpu(info->max_control_report_len);
+		iface->country = info->country_code;
+
+		name_off = le16_to_cpu(info->name_offset);
+		name_len = le16_to_cpu(info->name_length);
+
+		if (name_off < len && name_len <= len - name_off &&
+		    name_len < sizeof(iface->name)) {
+			memcpy(iface->name, payload + name_off, name_len);
+			iface->name[name_len] = '\0';
+		}
+
+		dev_dbg(&spihid->spidev->dev, "Info for %s, country code: 0x%x",
+			iface->name, iface->country);
+
+		wake_up_interruptible(&spihid->wait);
+	}
+
+	return true;
+}
+
+static int spihid_register_hid_device(struct spihid_apple *spihid,
+				      struct spihid_interface *idev, u8 device);
+
+static bool spihid_process_iface_hid_report_desc(struct spihid_apple *spihid,
+						 u32 num, u8 *payload,
+						 size_t len)
+{
+	struct spihid_interface *iface = spihid_get_iface(spihid, num);
+
+	if (!iface)
+		return false;
+
+	if (iface->hid_desc_len == 0) {
+		if (len > SPIHID_DESC_MAX)
+			return false;
+		memcpy(iface->hid_desc, payload, len);
+		iface->hid_desc_len = len;
+
+		/* do not register the mngt iface as HID device */
+		if (num > 0)
+			spihid_register_hid_device(spihid, iface, num);
+
+		wake_up_interruptible(&spihid->wait);
+	}
+	return true;
+}
+
+static bool spihid_process_iface_get_report(struct spihid_apple *spihid,
+					    u32 device, u8 report,
+					    u8 *payload, size_t len)
+{
+	struct spihid_interface *iface = spihid_get_iface(spihid, device);
+
+	if (!iface)
+		return false;
+
+	if (len > sizeof(iface->reply_buf) || len < 1)
+		return false;
+
+	memcpy(iface->reply_buf, payload, len);
+	iface->reply_len = len;
+
+	wake_up_interruptible(&spihid->wait);
+
+	return true;
+}
+
+static bool spihid_process_response(struct spihid_apple *spihid, u32 device,
+				    struct spihid_msg_hdr *hdr, u8 *payload,
+				    size_t len)
+{
+	if (hdr->unknown0 == 0x20) {
+		switch (hdr->unknown1) {
+		case 0x01:
+			return spihid_process_device_info(spihid, hdr->unknown2,
+							  payload, len);
+		case 0x02:
+			return spihid_process_iface_info(spihid, hdr->unknown2,
+							 payload, len);
+		case 0x10:
+			return spihid_process_iface_hid_report_desc(
+				spihid, hdr->unknown2, payload, len);
+		default:
+			break;
+		}
+	}
+
+	if (hdr->unknown0 == 0x32) {
+		return spihid_process_iface_get_report(spihid, device, hdr->unknown1, payload, len);
+	}
+
+	return false;
+}
+
+static void spihid_process_message(struct spihid_apple *spihid, u8 *data,
+				   size_t length, u8 device, u8 flags)
+{
+	struct device *dev = &spihid->spidev->dev;
+	struct spihid_msg_hdr *hdr;
+	bool handled = false;
+	size_t payload_len;
+	u8 *payload;
+
+	if (!spihid_verify_msg(spihid, data, length))
+		return;
+
+	hdr = (struct spihid_msg_hdr *)data;
+	payload_len = le16_to_cpu(hdr->length);
+
+	if (payload_len == 0 ||
+		(payload_len + sizeof(struct spihid_msg_hdr) + 2) > length)
+		return;
+
+	payload = data + sizeof(struct spihid_msg_hdr);
+
+	switch (flags) {
+	case SPIHID_READ_PACKET:
+		handled = spihid_process_input_report(spihid, device, hdr,
+						      payload, payload_len);
+		break;
+	case SPIHID_WRITE_PACKET:
+		handled = spihid_process_response(spihid, device, hdr, payload,
+						  payload_len);
+		break;
+	default:
+		break;
+	}
+
+#if defined(DEBUG) && DEBUG > 1
+	{
+		dev_dbg(dev,
+			"R msg: req:%02hhx rep:%02hhx dev:%02hhx id:%hu len:%hu\n",
+			hdr->unknown0, hdr->unknown1, hdr->unknown2, hdr->id,
+			hdr->length);
+		print_hex_dump_debug("spihid msg: ", DUMP_PREFIX_OFFSET, 16, 1,
+				     payload, le16_to_cpu(hdr->length), true);
+	}
+#else
+	if (!handled) {
+		dev_dbg(dev,
+			"R unhandled msg: req:%02hhx rep:%02hhx dev:%02hhx id:%hu len:%hu\n",
+			hdr->unknown0, hdr->unknown1, hdr->unknown2, hdr->id,
+			hdr->length);
+		print_hex_dump_debug("spihid msg: ", DUMP_PREFIX_OFFSET, 16, 1,
+				     payload, le16_to_cpu(hdr->length), true);
+	}
+#endif
+}
+
+static void spihid_assemble_message(struct spihid_apple *spihid,
+				    struct spihid_transfer_packet *pkt)
+{
+	size_t length, offset, remain;
+	struct device *dev = &spihid->spidev->dev;
+	struct spihid_input_report *rep = &spihid->report;
+
+	length = le16_to_cpu(pkt->length);
+	remain = le16_to_cpu(pkt->remain);
+	offset = le16_to_cpu(pkt->offset);
+
+	if (offset + length + remain > U16_MAX) {
+		return;
+	}
+
+	if (pkt->device != rep->device || pkt->flags != rep->flags ||
+	    offset != rep->offset) {
+		rep->device = 0;
+		rep->flags = 0;
+		rep->offset = 0;
+		rep->length = 0;
+	}
+
+	if (offset == 0) {
+		if (rep->offset != 0) {
+			dev_warn(dev, "incomplete report off:%u len:%u",
+				 rep->offset, rep->length);
+		}
+		memcpy(rep->buf, pkt->data, length);
+		rep->offset = length;
+		rep->length = length + remain;
+		rep->device = pkt->device;
+		rep->flags = pkt->flags;
+	} else if (offset == rep->offset) {
+		if (offset + length + remain != rep->length) {
+			dev_warn(dev, "incomplete report off:%u len:%u",
+				 rep->offset, rep->length);
+			return;
+		}
+		memcpy(rep->buf + offset, pkt->data, length);
+		rep->offset += length;
+
+		if (rep->offset == rep->length) {
+			spihid_process_message(spihid, rep->buf, rep->length,
+					       rep->device, rep->flags);
+			rep->device = 0;
+			rep->flags = 0;
+			rep->offset = 0;
+			rep->length = 0;
+		}
+	}
+}
+
+static void spihid_process_read(struct spihid_apple *spihid)
+{
+	u16 crc;
+	size_t length;
+	struct device *dev = &spihid->spidev->dev;
+	struct spihid_transfer_packet *pkt;
+
+	pkt = (struct spihid_transfer_packet *)spihid->rx_buf;
+
+	/* check transfer packet crc */
+	crc = crc16(0, spihid->rx_buf,
+		    offsetof(struct spihid_transfer_packet, crc16));
+	if (crc != le16_to_cpu(pkt->crc16)) {
+		dev_warn_ratelimited(dev, "Read package crc mismatch\n");
+		return;
+	}
+
+	length = le16_to_cpu(pkt->length);
+
+	if (length < sizeof(struct spihid_msg_hdr) + 2) {
+		if (length == sizeof(spi_hid_apple_booted) &&
+		    !memcmp(pkt->data, spi_hid_apple_booted, length)) {
+			if (!spihid->status_booted) {
+				spihid->status_booted = true;
+				wake_up_interruptible(&spihid->wait);
+			}
+		} else {
+			dev_info(dev, "R short packet: len:%zu\n", length);
+			print_hex_dump(KERN_INFO, "spihid pkt:",
+				       DUMP_PREFIX_OFFSET, 16, 1, pkt->data,
+				       length, false);
+		}
+		return;
+	}
+
+#if defined(DEBUG) && DEBUG > 1
+	dev_dbg(dev,
+		"R pkt: flags:%02hhx dev:%02hhx off:%hu remain:%hu, len:%zu\n",
+		pkt->flags, pkt->device, pkt->offset, pkt->remain, length);
+#if defined(DEBUG) && DEBUG > 2
+	print_hex_dump_debug("spihid pkt: ", DUMP_PREFIX_OFFSET, 16, 1,
+			     spihid->rx_buf,
+			     sizeof(struct spihid_transfer_packet), true);
+#endif
+#endif
+
+	if (length > sizeof(pkt->data)) {
+		dev_warn_ratelimited(dev, "Invalid pkt len:%zu", length);
+		return;
+	}
+
+	/* short message */
+	if (pkt->offset == 0 && pkt->remain == 0) {
+		spihid_process_message(spihid, pkt->data, length, pkt->device,
+				       pkt->flags);
+	} else {
+		spihid_assemble_message(spihid, pkt);
+	}
+}
+
+static void spihid_read_packet_sync(struct spihid_apple *spihid)
+{
+	int err;
+
+	err = spi_sync(spihid->spidev, &spihid->rx_msg);
+	if (!err) {
+		spihid_process_read(spihid);
+	} else {
+		dev_warn(&spihid->spidev->dev, "RX failed: %d\n", err);
+	}
+}
+
+irqreturn_t spihid_apple_core_irq(int irq, void *data)
+{
+	struct spi_device *spi = data;
+	struct spihid_apple *spihid = spi_get_drvdata(spi);
+
+	spihid_read_packet_sync(spihid);
+
+	return IRQ_HANDLED;
+}
+EXPORT_SYMBOL_GPL(spihid_apple_core_irq);
+
+static void spihid_apple_setup_spi_msgs(struct spihid_apple *spihid)
+{
+	memset(&spihid->rx_transfer, 0, sizeof(spihid->rx_transfer));
+
+	spihid->rx_transfer.rx_buf = spihid->rx_buf;
+	spihid->rx_transfer.len = sizeof(struct spihid_transfer_packet);
+
+	spi_message_init(&spihid->rx_msg);
+	spi_message_add_tail(&spihid->rx_transfer, &spihid->rx_msg);
+
+	memset(&spihid->tx_transfer, 0, sizeof(spihid->rx_transfer));
+	memset(&spihid->status_transfer, 0, sizeof(spihid->status_transfer));
+
+	spihid->tx_transfer.tx_buf = spihid->tx_buf;
+	spihid->tx_transfer.len = sizeof(struct spihid_transfer_packet);
+	spihid->tx_transfer.delay.unit = SPI_DELAY_UNIT_USECS;
+	spihid->tx_transfer.delay.value = SPI_RW_CHG_DELAY_US;
+
+	spihid->status_transfer.rx_buf = spihid->status_buf;
+	spihid->status_transfer.len = sizeof(spi_hid_apple_status_ok);
+
+	spi_message_init(&spihid->tx_msg);
+	spi_message_add_tail(&spihid->tx_transfer, &spihid->tx_msg);
+	spi_message_add_tail(&spihid->status_transfer, &spihid->tx_msg);
+}
+
+static int spihid_apple_setup_spi(struct spihid_apple *spihid)
+{
+	spihid_apple_setup_spi_msgs(spihid);
+
+	return spihid->ops->power_on(spihid->ops);
+}
+
+static int spihid_register_hid_device(struct spihid_apple *spihid,
+				      struct spihid_interface *iface, u8 device)
+{
+	int ret;
+	char *suffix;
+	struct hid_device *hid;
+
+	iface->id = device;
+
+	hid = hid_allocate_device();
+	if (IS_ERR(hid))
+		return PTR_ERR(hid);
+
+	/*
+	 * Use 'Apple SPI Keyboard' and 'Apple SPI Trackpad' as input device
+	 * names. The device names need to be distinct since at least Kwin uses
+	 * the tripple Vendor ID, Product ID, Name to identify devices.
+	 */
+	snprintf(hid->name, sizeof(hid->name), "Apple SPI %s", iface->name);
+	// strip ' / Boot' suffix from the name
+	suffix = strstr(hid->name, " / Boot");
+	if (suffix)
+		suffix[0] = '\0';
+	snprintf(hid->phys, sizeof(hid->phys), "%s (%hhx)",
+		 dev_name(&spihid->spidev->dev), device);
+	strscpy(hid->uniq, spihid->serial, sizeof(hid->uniq));
+
+	hid->ll_driver = &apple_hid_ll;
+	hid->bus = BUS_SPI;
+	hid->vendor = spihid->vendor_id;
+	hid->product = spihid->product_id;
+	hid->version = spihid->version_number;
+
+	if (device == SPIHID_DEVICE_ID_KBD)
+		hid->type = HID_TYPE_SPI_KEYBOARD;
+	else if (device == SPIHID_DEVICE_ID_TP)
+		hid->type = HID_TYPE_SPI_MOUSE;
+
+	hid->country = iface->country;
+	hid->dev.parent = &spihid->spidev->dev;
+	hid->driver_data = iface;
+
+	ret = hid_add_device(hid);
+	if (ret < 0) {
+		hid_destroy_device(hid);
+		dev_warn(&spihid->spidev->dev,
+			 "Failed to register hid device %hhu", device);
+		return ret;
+	}
+
+	iface->hid = hid;
+
+	return 0;
+}
+
+static void spihid_destroy_hid_device(struct spihid_interface *iface)
+{
+	if (iface->hid) {
+		hid_destroy_device(iface->hid);
+		iface->hid = NULL;
+	}
+	iface->ready = false;
+}
+
+int spihid_apple_core_probe(struct spi_device *spi, struct spihid_apple_ops *ops)
+{
+	struct device *dev = &spi->dev;
+	struct spihid_apple *spihid;
+	int err, i;
+
+	if (!ops || !ops->power_on || !ops->power_off || !ops->enable_irq || !ops->disable_irq)
+		return -EINVAL;
+
+	spihid = devm_kzalloc(dev, sizeof(*spihid), GFP_KERNEL);
+	if (!spihid)
+		return -ENOMEM;
+
+	spihid->ops = ops;
+	spihid->spidev = spi;
+
+	// init spi
+	spi_set_drvdata(spi, spihid);
+
+	/*
+	 * allocate SPI buffers
+	 * Overallocate the receice buffer since it passed directly into
+	 * hid_input_report / hid_report_raw_event. The later expects the buffer
+	 * to be HID_MAX_BUFFER_SIZE (16k) or hid_ll_driver.max_buffer_size if
+	 * set.
+	 */
+	spihid->rx_buf = devm_kmalloc(
+		&spi->dev, SPIHID_MAX_INPUT_REPORT_SIZE, GFP_KERNEL);
+	spihid->tx_buf = devm_kmalloc(
+		&spi->dev, sizeof(struct spihid_transfer_packet), GFP_KERNEL);
+	spihid->status_buf = devm_kmalloc(
+		&spi->dev, sizeof(spi_hid_apple_status_ok), GFP_KERNEL);
+
+	if (!spihid->rx_buf || !spihid->tx_buf || !spihid->status_buf)
+		return -ENOMEM;
+
+	spihid->report.buf =
+		devm_kmalloc(dev, SPIHID_MAX_INPUT_REPORT_SIZE, GFP_KERNEL);
+
+	spihid->kbd.hid_desc = devm_kmalloc(dev, SPIHID_DESC_MAX, GFP_KERNEL);
+	spihid->tp.hid_desc = devm_kmalloc(dev, SPIHID_DESC_MAX, GFP_KERNEL);
+
+	if (!spihid->report.buf || !spihid->kbd.hid_desc ||
+	    !spihid->tp.hid_desc)
+		return -ENOMEM;
+
+	init_waitqueue_head(&spihid->wait);
+
+	mutex_init(&spihid->tx_lock);
+
+	/* Init spi transfer buffers and power device on */
+	err = spihid_apple_setup_spi(spihid);
+	if (err < 0)
+		goto error;
+
+	/* enable HID irq */
+	spihid->ops->enable_irq(spihid->ops);
+
+	// wait for boot message
+	err = wait_event_interruptible_timeout(spihid->wait,
+					       spihid->status_booted,
+					       msecs_to_jiffies(1000));
+	if (err == 0)
+		err = -ENODEV;
+	if (err < 0) {
+		dev_err(dev, "waiting for device boot failed: %d", err);
+		goto error;
+	}
+
+	/* request device information */
+	dev_dbg(dev, "request device info");
+	spihid_apple_request(spihid, 0xd0, 0x20, 0x01, 0xd0, 0, NULL, 0);
+	err = wait_event_interruptible_timeout(spihid->wait, spihid->vendor_id,
+					       SPIHID_DEF_WAIT);
+	if (err == 0)
+		err = -ENODEV;
+	if (err < 0) {
+		dev_err(dev, "waiting for device info failed: %d", err);
+		goto error;
+	}
+
+	/* request interface information */
+	for (i = 0; i < spihid->num_devices; i++) {
+		struct spihid_interface *iface = spihid_get_iface(spihid, i);
+		if (!iface)
+			continue;
+		dev_dbg(dev, "request interface info 0x%02x", i);
+		spihid_apple_request(spihid, 0xd0, 0x20, 0x02, i,
+				     SPIHID_DESC_MAX, NULL, 0);
+		err = wait_event_interruptible_timeout(
+			spihid->wait, iface->max_input_report_len,
+			SPIHID_DEF_WAIT);
+	}
+
+	/* request HID report descriptors */
+	for (i = 1; i < spihid->num_devices; i++) {
+		struct spihid_interface *iface = spihid_get_iface(spihid, i);
+		if (!iface)
+			continue;
+		dev_dbg(dev, "request hid report desc 0x%02x", i);
+		spihid_apple_request(spihid, 0xd0, 0x20, 0x10, i,
+				     SPIHID_DESC_MAX, NULL, 0);
+		wait_event_interruptible_timeout(
+			spihid->wait, iface->hid_desc_len, SPIHID_DEF_WAIT);
+	}
+
+	return 0;
+error:
+	return err;
+}
+EXPORT_SYMBOL_GPL(spihid_apple_core_probe);
+
+void spihid_apple_core_remove(struct spi_device *spi)
+{
+	struct spihid_apple *spihid = spi_get_drvdata(spi);
+
+	/* destroy input devices */
+
+	spihid_destroy_hid_device(&spihid->tp);
+	spihid_destroy_hid_device(&spihid->kbd);
+
+	/* disable irq */
+	spihid->ops->disable_irq(spihid->ops);
+
+	/* power SPI device down */
+	spihid->ops->power_off(spihid->ops);
+}
+EXPORT_SYMBOL_GPL(spihid_apple_core_remove);
+
+void spihid_apple_core_shutdown(struct spi_device *spi)
+{
+	struct spihid_apple *spihid = spi_get_drvdata(spi);
+
+	/* disable irq */
+	spihid->ops->disable_irq(spihid->ops);
+
+	/* power SPI device down */
+	spihid->ops->power_off(spihid->ops);
+}
+EXPORT_SYMBOL_GPL(spihid_apple_core_shutdown);
+
+#ifdef CONFIG_PM_SLEEP
+static int spihid_apple_core_suspend(struct device *dev)
+{
+	int ret;
+#ifdef IRQ_WAKE_SUPPORT
+	int wake_status;
+#endif
+	struct spihid_apple *spihid = spi_get_drvdata(to_spi_device(dev));
+
+	if (spihid->tp.hid) {
+		ret = hid_driver_suspend(spihid->tp.hid, PMSG_SUSPEND);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (spihid->kbd.hid) {
+		ret = hid_driver_suspend(spihid->kbd.hid, PMSG_SUSPEND);
+		if (ret < 0) {
+			if (spihid->tp.hid)
+				hid_driver_resume(spihid->tp.hid);
+			return ret;
+		}
+	}
+
+	/* Save some power */
+	spihid->ops->disable_irq(spihid->ops);
+
+#ifdef IRQ_WAKE_SUPPORT
+	if (device_may_wakeup(dev)) {
+		wake_status = spihid->ops->enable_irq_wake(spihid->ops);
+		if (!wake_status)
+			spihid->irq_wake_enabled = true;
+		else
+			dev_warn(dev, "Failed to enable irq wake: %d\n",
+				wake_status);
+	} else {
+		spihid->ops->power_off(spihid->ops);
+	}
+#else
+	spihid->ops->power_off(spihid->ops);
+#endif
+
+	return 0;
+}
+
+static int spihid_apple_core_resume(struct device *dev)
+{
+	int ret_tp = 0, ret_kbd = 0;
+	struct spihid_apple *spihid = spi_get_drvdata(to_spi_device(dev));
+#ifdef IRQ_WAKE_SUPPORT
+	int wake_status;
+
+	if (!device_may_wakeup(dev)) {
+		spihid->ops->power_on(spihid->ops);
+	} else if (spihid->irq_wake_enabled) {
+		wake_status = spihid->ops->disable_irq_wake(spihid->ops);
+		if (!wake_status)
+			spihid->irq_wake_enabled = false;
+		else
+			dev_warn(dev, "Failed to disable irq wake: %d\n",
+				wake_status);
+	}
+#endif
+
+	spihid->ops->enable_irq(spihid->ops);
+	spihid->ops->power_on(spihid->ops);
+
+	if (spihid->tp.hid)
+		ret_tp = hid_driver_reset_resume(spihid->tp.hid);
+	if (spihid->kbd.hid)
+		ret_kbd = hid_driver_reset_resume(spihid->kbd.hid);
+
+	if (ret_tp < 0)
+		return ret_tp;
+
+	return ret_kbd;
+}
+#endif
+
+const struct dev_pm_ops spihid_apple_core_pm = {
+	SET_SYSTEM_SLEEP_PM_OPS(spihid_apple_core_suspend,
+				spihid_apple_core_resume)
+};
+EXPORT_SYMBOL_GPL(spihid_apple_core_pm);
+
+MODULE_DESCRIPTION("Apple SPI HID transport driver");
+MODULE_AUTHOR("Janne Grunau <j@jannau.net>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hid/spi-hid/spi-hid-apple-of.c b/drivers/hid/spi-hid/spi-hid-apple-of.c
new file mode 100644
index 000000000000..b631212b836d
--- /dev/null
+++ b/drivers/hid/spi-hid/spi-hid-apple-of.c
@@ -0,0 +1,153 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Apple SPI HID transport driver - Open Firmware
+ *
+ * Copyright (C) The Asahi Linux Contributors
+ */
+
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+
+#include "spi-hid-apple.h"
+
+
+struct spihid_apple_of {
+	struct spihid_apple_ops ops;
+
+	struct gpio_desc *enable_gpio;
+	int irq;
+};
+
+static int spihid_apple_of_power_on(struct spihid_apple_ops *ops)
+{
+	struct spihid_apple_of *sh_of = container_of(ops, struct spihid_apple_of, ops);
+
+	/* reset the controller on boot */
+	gpiod_direction_output(sh_of->enable_gpio, 1);
+	msleep(5);
+	gpiod_direction_output(sh_of->enable_gpio, 0);
+	msleep(5);
+	/* turn SPI device on */
+	gpiod_direction_output(sh_of->enable_gpio, 1);
+	msleep(50);
+
+	return 0;
+}
+
+static int spihid_apple_of_power_off(struct spihid_apple_ops *ops)
+{
+	struct spihid_apple_of *sh_of = container_of(ops, struct spihid_apple_of, ops);
+
+	/* turn SPI device off */
+	gpiod_direction_output(sh_of->enable_gpio, 0);
+
+	return 0;
+}
+
+static int spihid_apple_of_enable_irq(struct spihid_apple_ops *ops)
+{
+	struct spihid_apple_of *sh_of = container_of(ops, struct spihid_apple_of, ops);
+
+	enable_irq(sh_of->irq);
+
+	return 0;
+}
+
+static int spihid_apple_of_disable_irq(struct spihid_apple_ops *ops)
+{
+	struct spihid_apple_of *sh_of = container_of(ops, struct spihid_apple_of, ops);
+
+	disable_irq(sh_of->irq);
+
+	return 0;
+}
+
+static int spihid_apple_of_enable_irq_wake(struct spihid_apple_ops *ops)
+{
+	struct spihid_apple_of *sh_of = container_of(ops, struct spihid_apple_of, ops);
+
+	return enable_irq_wake(sh_of->irq);
+}
+
+static int spihid_apple_of_disable_irq_wake(struct spihid_apple_ops *ops)
+{
+	struct spihid_apple_of *sh_of = container_of(ops, struct spihid_apple_of, ops);
+
+	return disable_irq_wake(sh_of->irq);
+}
+
+static int spihid_apple_of_probe(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct spihid_apple_of *spihid_of;
+	int err;
+
+	spihid_of = devm_kzalloc(dev, sizeof(*spihid_of), GFP_KERNEL);
+	if (!spihid_of)
+		return -ENOMEM;
+
+	spihid_of->ops.power_on = spihid_apple_of_power_on;
+	spihid_of->ops.power_off = spihid_apple_of_power_off;
+	spihid_of->ops.enable_irq = spihid_apple_of_enable_irq;
+	spihid_of->ops.disable_irq = spihid_apple_of_disable_irq;
+	spihid_of->ops.enable_irq_wake = spihid_apple_of_enable_irq_wake;
+	spihid_of->ops.disable_irq_wake = spihid_apple_of_disable_irq_wake;
+
+	spihid_of->enable_gpio = devm_gpiod_get_index(dev, "spien", 0, 0);
+	if (IS_ERR(spihid_of->enable_gpio)) {
+		err = PTR_ERR(spihid_of->enable_gpio);
+		dev_err(dev, "failed to get 'spien' gpio pin: %d", err);
+		return err;
+	}
+
+	spihid_of->irq = of_irq_get(dev->of_node, 0);
+	if (spihid_of->irq < 0) {
+		err = spihid_of->irq;
+		dev_err(dev, "failed to get 'extended-irq': %d", err);
+		return err;
+	}
+	err = devm_request_threaded_irq(dev, spihid_of->irq, NULL,
+					spihid_apple_core_irq, IRQF_ONESHOT | IRQF_NO_AUTOEN,
+					"spi-hid-apple-irq", spi);
+	if (err < 0) {
+		dev_err(dev, "failed to request extended-irq %d: %d",
+			spihid_of->irq, err);
+		return err;
+	}
+
+	return spihid_apple_core_probe(spi, &spihid_of->ops);
+}
+
+static const struct of_device_id spihid_apple_of_match[] = {
+	{ .compatible = "apple,spi-hid-transport" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, spihid_apple_of_match);
+
+static struct spi_device_id spihid_apple_of_id[] = {
+	{ "spi-hid-transport", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(spi, spihid_apple_of_id);
+
+static struct spi_driver spihid_apple_of_driver = {
+	.driver = {
+		.name	= "spi-hid-apple-of",
+		.pm	= &spihid_apple_core_pm,
+		.of_match_table = of_match_ptr(spihid_apple_of_match),
+	},
+
+	.id_table	= spihid_apple_of_id,
+	.probe		= spihid_apple_of_probe,
+	.remove		= spihid_apple_core_remove,
+	.shutdown	= spihid_apple_core_shutdown,
+};
+
+module_spi_driver(spihid_apple_of_driver);
+
+MODULE_DESCRIPTION("Apple SPI HID transport driver for OpenFirmware systems");
+MODULE_AUTHOR("Janne Grunau <j@jannau.net>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hid/spi-hid/spi-hid-apple.h b/drivers/hid/spi-hid/spi-hid-apple.h
new file mode 100644
index 000000000000..9abecd1ba780
--- /dev/null
+++ b/drivers/hid/spi-hid/spi-hid-apple.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+
+#ifndef SPI_HID_APPLE_H
+#define SPI_HID_APPLE_H
+
+#include <linux/interrupt.h>
+#include <linux/spi/spi.h>
+
+/**
+ * struct spihid_apple_ops - Ops to control the device from the core driver.
+ *
+ * @power_on: reset and power the device on.
+ * @power_off: power the device off.
+ * @enable_irq: enable irq or ACPI gpe.
+ * @disable_irq: disable irq or ACPI gpe.
+ */
+
+struct spihid_apple_ops {
+    int (*power_on)(struct spihid_apple_ops *ops);
+    int (*power_off)(struct spihid_apple_ops *ops);
+    int (*enable_irq)(struct spihid_apple_ops *ops);
+    int (*disable_irq)(struct spihid_apple_ops *ops);
+    int (*enable_irq_wake)(struct spihid_apple_ops *ops);
+    int (*disable_irq_wake)(struct spihid_apple_ops *ops);
+};
+
+irqreturn_t spihid_apple_core_irq(int irq, void *data);
+
+int spihid_apple_core_probe(struct spi_device *spi, struct spihid_apple_ops *ops);
+void spihid_apple_core_remove(struct spi_device *spi);
+void spihid_apple_core_shutdown(struct spi_device *spi);
+
+extern const struct dev_pm_ops spihid_apple_core_pm;
+
+#endif /* SPI_HID_APPLE_H */
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index fc6d6a9053ce..698f44794453 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -6,6 +6,7 @@
  *
  * Copyright (C) 2007 Nicolas Boichat <nicolas@boichat.ch>
  * Copyright (C) 2010 Henrik Rydberg <rydberg@euromail.se>
+ * Copyright (C) 2019 Paul Pawlowski <paul@mrarm.io>
  *
  * Based on hdaps.c driver:
  * Copyright (C) 2005 Robert Love <rml@novell.com>
@@ -18,7 +19,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/delay.h>
-#include <linux/platform_device.h>
+#include <linux/acpi.h>
 #include <linux/input.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -35,12 +36,24 @@
 #include <linux/bits.h>
 
 /* data port used by Apple SMC */
-#define APPLESMC_DATA_PORT	0x300
+#define APPLESMC_DATA_PORT	0
 /* command/status port used by Apple SMC */
-#define APPLESMC_CMD_PORT	0x304
+#define APPLESMC_CMD_PORT	4
 
 #define APPLESMC_NR_PORTS	32 /* 0x300-0x31f */
 
+#define APPLESMC_IOMEM_KEY_DATA	0
+#define APPLESMC_IOMEM_KEY_STATUS	0x4005
+#define APPLESMC_IOMEM_KEY_NAME	0x78
+#define APPLESMC_IOMEM_KEY_DATA_LEN	0x7D
+#define APPLESMC_IOMEM_KEY_SMC_ID	0x7E
+#define APPLESMC_IOMEM_KEY_CMD		0x7F
+#define APPLESMC_IOMEM_MIN_SIZE	0x4006
+
+#define APPLESMC_IOMEM_KEY_TYPE_CODE		0
+#define APPLESMC_IOMEM_KEY_TYPE_DATA_LEN	5
+#define APPLESMC_IOMEM_KEY_TYPE_FLAGS		6
+
 #define APPLESMC_MAX_DATA_LENGTH 32
 
 /* Apple SMC status bits */
@@ -74,6 +87,7 @@
 #define FAN_ID_FMT		"F%dID" /* r-o char[16] */
 
 #define TEMP_SENSOR_TYPE	"sp78"
+#define FLOAT_TYPE		"flt "
 
 /* List of keys used to read/write fan speeds */
 static const char *const fan_speed_fmt[] = {
@@ -83,6 +97,7 @@ static const char *const fan_speed_fmt[] = {
 	"F%dSf",		/* safe speed - not all models */
 	"F%dTg",		/* target speed (manual: rw) */
 };
+#define FAN_MANUAL_FMT "F%dMd"
 
 #define INIT_TIMEOUT_MSECS	5000	/* wait up to 5s for device init ... */
 #define INIT_WAIT_MSECS		50	/* ... in 50ms increments */
@@ -119,7 +134,7 @@ struct applesmc_entry {
 };
 
 /* Register lookup and registers common to all SMCs */
-static struct applesmc_registers {
+struct applesmc_registers {
 	struct mutex mutex;		/* register read/write mutex */
 	unsigned int key_count;		/* number of SMC registers */
 	unsigned int fan_count;		/* number of fans */
@@ -133,26 +148,38 @@ static struct applesmc_registers {
 	bool init_complete;		/* true when fully initialized */
 	struct applesmc_entry *cache;	/* cached key entries */
 	const char **index;		/* temperature key index */
-} smcreg = {
-	.mutex = __MUTEX_INITIALIZER(smcreg.mutex),
 };
 
-static const int debug;
-static struct platform_device *pdev;
-static s16 rest_x;
-static s16 rest_y;
-static u8 backlight_state[2];
+struct applesmc_device {
+	struct acpi_device *dev;
+	struct device *ldev;
+	struct applesmc_registers reg;
 
-static struct device *hwmon_dev;
-static struct input_dev *applesmc_idev;
+	bool port_base_set, iomem_base_set;
+	u16 port_base;
+	u8 *__iomem iomem_base;
+	u32 iomem_base_addr, iomem_base_size;
 
-/*
- * Last index written to key_at_index sysfs file, and value to use for all other
- * key_at_index_* sysfs files.
- */
-static unsigned int key_at_index;
+	s16 rest_x;
+	s16 rest_y;
+
+	u8 backlight_state[2];
+
+	struct device *hwmon_dev;
+	struct input_dev *idev;
+
+	/*
+	 * Last index written to key_at_index sysfs file, and value to use for all other
+	 * key_at_index_* sysfs files.
+	 */
+	unsigned int key_at_index;
 
-static struct workqueue_struct *applesmc_led_wq;
+	struct workqueue_struct *backlight_wq;
+	struct work_struct backlight_work;
+	struct led_classdev backlight_dev;
+};
+
+static const int debug;
 
 /*
  * Wait for specific status bits with a mask on the SMC.
@@ -162,7 +189,7 @@ static struct workqueue_struct *applesmc_led_wq;
  * run out past 500ms.
  */
 
-static int wait_status(u8 val, u8 mask)
+static int port_wait_status(struct applesmc_device *smc, u8 val, u8 mask)
 {
 	u8 status;
 	int us;
@@ -170,7 +197,7 @@ static int wait_status(u8 val, u8 mask)
 
 	us = APPLESMC_MIN_WAIT;
 	for (i = 0; i < 24 ; i++) {
-		status = inb(APPLESMC_CMD_PORT);
+		status = inb(smc->port_base + APPLESMC_CMD_PORT);
 		if ((status & mask) == val)
 			return 0;
 		usleep_range(us, us * 2);
@@ -180,13 +207,13 @@ static int wait_status(u8 val, u8 mask)
 	return -EIO;
 }
 
-/* send_byte - Write to SMC data port. Callers must hold applesmc_lock. */
+/* port_send_byte - Write to SMC data port. Callers must hold applesmc_lock. */
 
-static int send_byte(u8 cmd, u16 port)
+static int port_send_byte(struct applesmc_device *smc, u8 cmd, u16 port)
 {
 	int status;
 
-	status = wait_status(0, SMC_STATUS_IB_CLOSED);
+	status = port_wait_status(smc, 0, SMC_STATUS_IB_CLOSED);
 	if (status)
 		return status;
 	/*
@@ -195,24 +222,25 @@ static int send_byte(u8 cmd, u16 port)
 	 * this extra read may not happen if status returns both
 	 * simultaneously and this would appear to be required.
 	 */
-	status = wait_status(SMC_STATUS_BUSY, SMC_STATUS_BUSY);
+	status = port_wait_status(smc, SMC_STATUS_BUSY, SMC_STATUS_BUSY);
 	if (status)
 		return status;
 
-	outb(cmd, port);
+	outb(cmd, smc->port_base + port);
 	return 0;
 }
 
-/* send_command - Write a command to the SMC. Callers must hold applesmc_lock. */
+/* port_send_command - Write a command to the SMC. Callers must hold applesmc_lock. */
 
-static int send_command(u8 cmd)
+static int port_send_command(struct applesmc_device *smc, u8 cmd)
 {
 	int ret;
 
-	ret = wait_status(0, SMC_STATUS_IB_CLOSED);
+	ret = port_wait_status(smc, 0, SMC_STATUS_IB_CLOSED);
 	if (ret)
 		return ret;
-	outb(cmd, APPLESMC_CMD_PORT);
+
+	outb(cmd, smc->port_base + APPLESMC_CMD_PORT);
 	return 0;
 }
 
@@ -222,110 +250,304 @@ static int send_command(u8 cmd)
  * If busy is stuck high after the command then the SMC is jammed.
  */
 
-static int smc_sane(void)
+static int port_smc_sane(struct applesmc_device *smc)
 {
 	int ret;
 
-	ret = wait_status(0, SMC_STATUS_BUSY);
+	ret = port_wait_status(smc, 0, SMC_STATUS_BUSY);
 	if (!ret)
 		return ret;
-	ret = send_command(APPLESMC_READ_CMD);
+	ret = port_send_command(smc, APPLESMC_READ_CMD);
 	if (ret)
 		return ret;
-	return wait_status(0, SMC_STATUS_BUSY);
+	return port_wait_status(smc, 0, SMC_STATUS_BUSY);
 }
 
-static int send_argument(const char *key)
+static int port_send_argument(struct applesmc_device *smc, const char *key)
 {
 	int i;
 
 	for (i = 0; i < 4; i++)
-		if (send_byte(key[i], APPLESMC_DATA_PORT))
+		if (port_send_byte(smc, key[i], APPLESMC_DATA_PORT))
 			return -EIO;
 	return 0;
 }
 
-static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len)
+static int port_read_smc(struct applesmc_device *smc, u8 cmd, const char *key,
+	u8 *buffer, u8 len)
 {
 	u8 status, data = 0;
 	int i;
 	int ret;
 
-	ret = smc_sane();
+	ret = port_smc_sane(smc);
 	if (ret)
 		return ret;
 
-	if (send_command(cmd) || send_argument(key)) {
+	if (port_send_command(smc, cmd) || port_send_argument(smc, key)) {
 		pr_warn("%.4s: read arg fail\n", key);
 		return -EIO;
 	}
 
 	/* This has no effect on newer (2012) SMCs */
-	if (send_byte(len, APPLESMC_DATA_PORT)) {
+	if (port_send_byte(smc, len, APPLESMC_DATA_PORT)) {
 		pr_warn("%.4s: read len fail\n", key);
 		return -EIO;
 	}
 
 	for (i = 0; i < len; i++) {
-		if (wait_status(SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY,
+		if (port_wait_status(smc,
+				SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY,
 				SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY)) {
 			pr_warn("%.4s: read data[%d] fail\n", key, i);
 			return -EIO;
 		}
-		buffer[i] = inb(APPLESMC_DATA_PORT);
+		buffer[i] = inb(smc->port_base + APPLESMC_DATA_PORT);
 	}
 
 	/* Read the data port until bit0 is cleared */
 	for (i = 0; i < 16; i++) {
 		udelay(APPLESMC_MIN_WAIT);
-		status = inb(APPLESMC_CMD_PORT);
+		status = inb(smc->port_base + APPLESMC_CMD_PORT);
 		if (!(status & SMC_STATUS_AWAITING_DATA))
 			break;
-		data = inb(APPLESMC_DATA_PORT);
+		data = inb(smc->port_base + APPLESMC_DATA_PORT);
 	}
 	if (i)
 		pr_warn("flushed %d bytes, last value is: %d\n", i, data);
 
-	return wait_status(0, SMC_STATUS_BUSY);
+	return port_wait_status(smc, 0, SMC_STATUS_BUSY);
 }
 
-static int write_smc(u8 cmd, const char *key, const u8 *buffer, u8 len)
+static int port_write_smc(struct applesmc_device *smc, u8 cmd, const char *key,
+	const u8 *buffer, u8 len)
 {
 	int i;
 	int ret;
 
-	ret = smc_sane();
+	ret = port_smc_sane(smc);
 	if (ret)
 		return ret;
 
-	if (send_command(cmd) || send_argument(key)) {
+	if (port_send_command(smc, cmd) || port_send_argument(smc, key)) {
 		pr_warn("%s: write arg fail\n", key);
 		return -EIO;
 	}
 
-	if (send_byte(len, APPLESMC_DATA_PORT)) {
+	if (port_send_byte(smc, len, APPLESMC_DATA_PORT)) {
 		pr_warn("%.4s: write len fail\n", key);
 		return -EIO;
 	}
 
 	for (i = 0; i < len; i++) {
-		if (send_byte(buffer[i], APPLESMC_DATA_PORT)) {
+		if (port_send_byte(smc, buffer[i], APPLESMC_DATA_PORT)) {
 			pr_warn("%s: write data fail\n", key);
 			return -EIO;
 		}
 	}
 
-	return wait_status(0, SMC_STATUS_BUSY);
+	return port_wait_status(smc, 0, SMC_STATUS_BUSY);
 }
 
-static int read_register_count(unsigned int *count)
+static int port_get_smc_key_info(struct applesmc_device *smc,
+	const char *key, struct applesmc_entry *info)
 {
-	__be32 be;
 	int ret;
+	u8 raw[6];
 
-	ret = read_smc(APPLESMC_READ_CMD, KEY_COUNT_KEY, (u8 *)&be, 4);
+	ret = port_read_smc(smc, APPLESMC_GET_KEY_TYPE_CMD, key, raw, 6);
 	if (ret)
 		return ret;
+	info->len = raw[0];
+	memcpy(info->type, &raw[1], 4);
+	info->flags = raw[5];
+	return 0;
+}
+
+
+/*
+ * MMIO based communication.
+ * TODO: Use updated mechanism for cmd timeout/retry
+ */
+
+static void iomem_clear_status(struct applesmc_device *smc)
+{
+	if (ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS))
+		iowrite8(0, smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS);
+}
+
+static int iomem_wait_read(struct applesmc_device *smc)
+{
+	u8 status;
+	int us;
+	int i;
+
+	us = APPLESMC_MIN_WAIT;
+	for (i = 0; i < 24 ; i++) {
+		status = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS);
+		if (status & 0x20)
+			return 0;
+		usleep_range(us, us * 2);
+		if (i > 9)
+			us <<= 1;
+	}
+
+	dev_warn(smc->ldev, "%s... timeout\n", __func__);
+	return -EIO;
+}
+
+static int iomem_read_smc(struct applesmc_device *smc, u8 cmd, const char *key,
+	u8 *buffer, u8 len)
+{
+	u8 err, remote_len;
+	u32 key_int = *((u32 *) key);
+
+	iomem_clear_status(smc);
+	iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME);
+	iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID);
+	iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
+
+	if (iomem_wait_read(smc))
+		return -EIO;
+
+	err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
+	if (err != 0) {
+		dev_warn(smc->ldev, "read_smc_mmio(%x %8x/%.4s) failed: %u\n",
+				cmd, key_int, key, err);
+		return -EIO;
+	}
+
+	if (cmd == APPLESMC_READ_CMD) {
+		remote_len = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_DATA_LEN);
+		if (remote_len != len) {
+			dev_warn(smc->ldev,
+				 "read_smc_mmio(%x %8x/%.4s) failed: buffer length mismatch (remote = %u, requested = %u)\n",
+				 cmd, key_int, key, remote_len, len);
+			return -EINVAL;
+		}
+	} else {
+		remote_len = len;
+	}
+
+	memcpy_fromio(buffer, smc->iomem_base + APPLESMC_IOMEM_KEY_DATA,
+			remote_len);
+
+	dev_dbg(smc->ldev, "read_smc_mmio(%x %8x/%.4s): buflen=%u reslen=%u\n",
+			cmd, key_int, key, len, remote_len);
+	print_hex_dump_bytes("read_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, remote_len);
+	return 0;
+}
+
+static int iomem_get_smc_key_type(struct applesmc_device *smc, const char *key,
+	struct applesmc_entry *e)
+{
+	u8 err;
+	u8 cmd = APPLESMC_GET_KEY_TYPE_CMD;
+	u32 key_int = *((u32 *) key);
+
+	iomem_clear_status(smc);
+	iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME);
+	iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID);
+	iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
+
+	if (iomem_wait_read(smc))
+		return -EIO;
+
+	err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
+	if (err != 0) {
+		dev_warn(smc->ldev, "get_smc_key_type_mmio(%.4s) failed: %u\n", key, err);
+		return -EIO;
+	}
+
+	e->len = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_DATA_LEN);
+	*((uint32_t *) e->type) = ioread32(
+			smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_CODE);
+	e->flags = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_FLAGS);
+
+	dev_dbg(smc->ldev, "get_smc_key_type_mmio(%.4s): len=%u type=%.4s flags=%x\n",
+		key, e->len, e->type, e->flags);
+	return 0;
+}
+
+static int iomem_write_smc(struct applesmc_device *smc, u8 cmd, const char *key,
+	const u8 *buffer, u8 len)
+{
+	u8 err;
+	u32 key_int = *((u32 *) key);
+
+	iomem_clear_status(smc);
+	iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME);
+	memcpy_toio(smc->iomem_base + APPLESMC_IOMEM_KEY_DATA, buffer, len);
+	iowrite32(len, smc->iomem_base + APPLESMC_IOMEM_KEY_DATA_LEN);
+	iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID);
+	iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
+
+	if (iomem_wait_read(smc))
+		return -EIO;
+
+	err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
+	if (err != 0) {
+		dev_warn(smc->ldev, "write_smc_mmio(%x %.4s) failed: %u\n", cmd, key, err);
+		print_hex_dump_bytes("write_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, len);
+		return -EIO;
+	}
+
+	dev_dbg(smc->ldev, "write_smc_mmio(%x %.4s): buflen=%u\n", cmd, key, len);
+	print_hex_dump_bytes("write_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, len);
+	return 0;
+}
+
+
+static int read_smc(struct applesmc_device *smc, const char *key,
+	u8 *buffer, u8 len)
+{
+	if (smc->iomem_base_set)
+		return iomem_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len);
+	else
+		return port_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len);
+}
+
+static int write_smc(struct applesmc_device *smc, const char *key,
+	const u8 *buffer, u8 len)
+{
+	if (smc->iomem_base_set)
+		return iomem_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len);
+	else
+		return port_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len);
+}
+
+static int get_smc_key_by_index(struct applesmc_device *smc,
+	unsigned int index, char *key)
+{
+	__be32 be;
+
+	be = cpu_to_be32(index);
+	if (smc->iomem_base_set)
+		return iomem_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD,
+							  (const char *) &be, (u8 *) key, 4);
+	else
+		return port_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD,
+							 (const char *) &be, (u8 *) key, 4);
+}
+
+static int get_smc_key_info(struct applesmc_device *smc, const char *key,
+	struct applesmc_entry *info)
+{
+	if (smc->iomem_base_set)
+		return iomem_get_smc_key_type(smc, key, info);
+	else
+		return port_get_smc_key_info(smc, key, info);
+}
+
+static int read_register_count(struct applesmc_device *smc,
+	unsigned int *count)
+{
+	__be32 be;
+	int ret;
+
+	ret = read_smc(smc, KEY_COUNT_KEY, (u8 *)&be, 4);
+	if (ret < 0)
+		return ret;
 
 	*count = be32_to_cpu(be);
 	return 0;
@@ -338,76 +560,73 @@ static int read_register_count(unsigned int *count)
  * All functions below are concurrency safe - callers should NOT hold lock.
  */
 
-static int applesmc_read_entry(const struct applesmc_entry *entry,
-			       u8 *buf, u8 len)
+static int applesmc_read_entry(struct applesmc_device *smc,
+	const struct applesmc_entry *entry, u8 *buf, u8 len)
 {
 	int ret;
 
 	if (entry->len != len)
 		return -EINVAL;
-	mutex_lock(&smcreg.mutex);
-	ret = read_smc(APPLESMC_READ_CMD, entry->key, buf, len);
-	mutex_unlock(&smcreg.mutex);
+	mutex_lock(&smc->reg.mutex);
+	ret = read_smc(smc, entry->key, buf, len);
+	mutex_unlock(&smc->reg.mutex);
 
 	return ret;
 }
 
-static int applesmc_write_entry(const struct applesmc_entry *entry,
-				const u8 *buf, u8 len)
+static int applesmc_write_entry(struct applesmc_device *smc,
+	const struct applesmc_entry *entry, const u8 *buf, u8 len)
 {
 	int ret;
 
 	if (entry->len != len)
 		return -EINVAL;
-	mutex_lock(&smcreg.mutex);
-	ret = write_smc(APPLESMC_WRITE_CMD, entry->key, buf, len);
-	mutex_unlock(&smcreg.mutex);
+	mutex_lock(&smc->reg.mutex);
+	ret = write_smc(smc, entry->key, buf, len);
+	mutex_unlock(&smc->reg.mutex);
 	return ret;
 }
 
-static const struct applesmc_entry *applesmc_get_entry_by_index(int index)
+static const struct applesmc_entry *applesmc_get_entry_by_index(
+	struct applesmc_device *smc, int index)
 {
-	struct applesmc_entry *cache = &smcreg.cache[index];
-	u8 key[4], info[6];
-	__be32 be;
+	struct applesmc_entry *cache = &smc->reg.cache[index];
+	char key[4];
 	int ret = 0;
 
 	if (cache->valid)
 		return cache;
 
-	mutex_lock(&smcreg.mutex);
+	mutex_lock(&smc->reg.mutex);
 
 	if (cache->valid)
 		goto out;
-	be = cpu_to_be32(index);
-	ret = read_smc(APPLESMC_GET_KEY_BY_INDEX_CMD, (u8 *)&be, key, 4);
+	ret = get_smc_key_by_index(smc, index, key);
 	if (ret)
 		goto out;
-	ret = read_smc(APPLESMC_GET_KEY_TYPE_CMD, key, info, 6);
+	memcpy(cache->key, key, 4);
+
+	ret = get_smc_key_info(smc, key, cache);
 	if (ret)
 		goto out;
-
-	memcpy(cache->key, key, 4);
-	cache->len = info[0];
-	memcpy(cache->type, &info[1], 4);
-	cache->flags = info[5];
 	cache->valid = true;
 
 out:
-	mutex_unlock(&smcreg.mutex);
+	mutex_unlock(&smc->reg.mutex);
 	if (ret)
 		return ERR_PTR(ret);
 	return cache;
 }
 
-static int applesmc_get_lower_bound(unsigned int *lo, const char *key)
+static int applesmc_get_lower_bound(struct applesmc_device *smc,
+	unsigned int *lo, const char *key)
 {
-	int begin = 0, end = smcreg.key_count;
+	int begin = 0, end = smc->reg.key_count;
 	const struct applesmc_entry *entry;
 
 	while (begin != end) {
 		int middle = begin + (end - begin) / 2;
-		entry = applesmc_get_entry_by_index(middle);
+		entry = applesmc_get_entry_by_index(smc, middle);
 		if (IS_ERR(entry)) {
 			*lo = 0;
 			return PTR_ERR(entry);
@@ -422,16 +641,17 @@ static int applesmc_get_lower_bound(unsigned int *lo, const char *key)
 	return 0;
 }
 
-static int applesmc_get_upper_bound(unsigned int *hi, const char *key)
+static int applesmc_get_upper_bound(struct applesmc_device *smc,
+	unsigned int *hi, const char *key)
 {
-	int begin = 0, end = smcreg.key_count;
+	int begin = 0, end = smc->reg.key_count;
 	const struct applesmc_entry *entry;
 
 	while (begin != end) {
 		int middle = begin + (end - begin) / 2;
-		entry = applesmc_get_entry_by_index(middle);
+		entry = applesmc_get_entry_by_index(smc, middle);
 		if (IS_ERR(entry)) {
-			*hi = smcreg.key_count;
+			*hi = smc->reg.key_count;
 			return PTR_ERR(entry);
 		}
 		if (strcmp(key, entry->key) < 0)
@@ -444,50 +664,54 @@ static int applesmc_get_upper_bound(unsigned int *hi, const char *key)
 	return 0;
 }
 
-static const struct applesmc_entry *applesmc_get_entry_by_key(const char *key)
+static const struct applesmc_entry *applesmc_get_entry_by_key(
+	struct applesmc_device *smc, const char *key)
 {
 	int begin, end;
 	int ret;
 
-	ret = applesmc_get_lower_bound(&begin, key);
+	ret = applesmc_get_lower_bound(smc, &begin, key);
 	if (ret)
 		return ERR_PTR(ret);
-	ret = applesmc_get_upper_bound(&end, key);
+	ret = applesmc_get_upper_bound(smc, &end, key);
 	if (ret)
 		return ERR_PTR(ret);
 	if (end - begin != 1)
 		return ERR_PTR(-EINVAL);
 
-	return applesmc_get_entry_by_index(begin);
+	return applesmc_get_entry_by_index(smc, begin);
 }
 
-static int applesmc_read_key(const char *key, u8 *buffer, u8 len)
+static int applesmc_read_key(struct applesmc_device *smc,
+	const char *key, u8 *buffer, u8 len)
 {
 	const struct applesmc_entry *entry;
 
-	entry = applesmc_get_entry_by_key(key);
+	entry = applesmc_get_entry_by_key(smc, key);
 	if (IS_ERR(entry))
 		return PTR_ERR(entry);
 
-	return applesmc_read_entry(entry, buffer, len);
+	return applesmc_read_entry(smc, entry, buffer, len);
 }
 
-static int applesmc_write_key(const char *key, const u8 *buffer, u8 len)
+static int applesmc_write_key(struct applesmc_device *smc,
+	const char *key, const u8 *buffer, u8 len)
 {
 	const struct applesmc_entry *entry;
 
-	entry = applesmc_get_entry_by_key(key);
+	entry = applesmc_get_entry_by_key(smc, key);
 	if (IS_ERR(entry))
 		return PTR_ERR(entry);
 
-	return applesmc_write_entry(entry, buffer, len);
+	return applesmc_write_entry(smc, entry, buffer, len);
 }
 
-static int applesmc_has_key(const char *key, bool *value)
+static int applesmc_has_key(struct applesmc_device *smc,
+	const char *key, bool *value)
 {
 	const struct applesmc_entry *entry;
 
-	entry = applesmc_get_entry_by_key(key);
+	entry = applesmc_get_entry_by_key(smc, key);
 	if (IS_ERR(entry) && PTR_ERR(entry) != -EINVAL)
 		return PTR_ERR(entry);
 
@@ -498,12 +722,13 @@ static int applesmc_has_key(const char *key, bool *value)
 /*
  * applesmc_read_s16 - Read 16-bit signed big endian register
  */
-static int applesmc_read_s16(const char *key, s16 *value)
+static int applesmc_read_s16(struct applesmc_device *smc,
+	const char *key, s16 *value)
 {
 	u8 buffer[2];
 	int ret;
 
-	ret = applesmc_read_key(key, buffer, 2);
+	ret = applesmc_read_key(smc, key, buffer, 2);
 	if (ret)
 		return ret;
 
@@ -511,31 +736,68 @@ static int applesmc_read_s16(const char *key, s16 *value)
 	return 0;
 }
 
+/**
+ * applesmc_float_to_u32 - Retrieve the integral part of a float.
+ * This is needed because Apple made fans use float values in the T2.
+ * The fractional point is not significantly useful though, and the integral
+ * part can be easily extracted.
+ */
+static inline u32 applesmc_float_to_u32(u32 d)
+{
+	u8 sign = (u8) ((d >> 31) & 1);
+	s32 exp = (s32) ((d >> 23) & 0xff) - 0x7f;
+	u32 fr = d & ((1u << 23) - 1);
+
+	if (sign || exp < 0)
+		return 0;
+
+	return (u32) ((1u << exp) + (fr >> (23 - exp)));
+}
+
+/**
+ * applesmc_u32_to_float - Convert an u32 into a float.
+ * See applesmc_float_to_u32 for a rationale.
+ */
+static inline u32 applesmc_u32_to_float(u32 d)
+{
+	u32 dc = d, bc = 0, exp;
+
+	if (!d)
+		return 0;
+
+	while (dc >>= 1)
+		++bc;
+	exp = 0x7f + bc;
+
+	return (u32) ((exp << 23) |
+		((d << (23 - (exp - 0x7f))) & ((1u << 23) - 1)));
+}
 /*
  * applesmc_device_init - initialize the accelerometer.  Can sleep.
  */
-static void applesmc_device_init(void)
+static void applesmc_device_init(struct applesmc_device *smc)
 {
 	int total;
 	u8 buffer[2];
 
-	if (!smcreg.has_accelerometer)
+	if (!smc->reg.has_accelerometer)
 		return;
 
 	for (total = INIT_TIMEOUT_MSECS; total > 0; total -= INIT_WAIT_MSECS) {
-		if (!applesmc_read_key(MOTION_SENSOR_KEY, buffer, 2) &&
+		if (!applesmc_read_key(smc, MOTION_SENSOR_KEY, buffer, 2) &&
 				(buffer[0] != 0x00 || buffer[1] != 0x00))
 			return;
 		buffer[0] = 0xe0;
 		buffer[1] = 0x00;
-		applesmc_write_key(MOTION_SENSOR_KEY, buffer, 2);
+		applesmc_write_key(smc, MOTION_SENSOR_KEY, buffer, 2);
 		msleep(INIT_WAIT_MSECS);
 	}
 
 	pr_warn("failed to init the device\n");
 }
 
-static int applesmc_init_index(struct applesmc_registers *s)
+static int applesmc_init_index(struct applesmc_device *smc,
+	struct applesmc_registers *s)
 {
 	const struct applesmc_entry *entry;
 	unsigned int i;
@@ -548,7 +810,7 @@ static int applesmc_init_index(struct applesmc_registers *s)
 		return -ENOMEM;
 
 	for (i = s->temp_begin; i < s->temp_end; i++) {
-		entry = applesmc_get_entry_by_index(i);
+		entry = applesmc_get_entry_by_index(smc, i);
 		if (IS_ERR(entry))
 			continue;
 		if (strcmp(entry->type, TEMP_SENSOR_TYPE))
@@ -562,9 +824,9 @@ static int applesmc_init_index(struct applesmc_registers *s)
 /*
  * applesmc_init_smcreg_try - Try to initialize register cache. Idempotent.
  */
-static int applesmc_init_smcreg_try(void)
+static int applesmc_init_smcreg_try(struct applesmc_device *smc)
 {
-	struct applesmc_registers *s = &smcreg;
+	struct applesmc_registers *s = &smc->reg;
 	bool left_light_sensor = false, right_light_sensor = false;
 	unsigned int count;
 	u8 tmp[1];
@@ -573,7 +835,7 @@ static int applesmc_init_smcreg_try(void)
 	if (s->init_complete)
 		return 0;
 
-	ret = read_register_count(&count);
+	ret = read_register_count(smc, &count);
 	if (ret)
 		return ret;
 
@@ -590,35 +852,35 @@ static int applesmc_init_smcreg_try(void)
 	if (!s->cache)
 		return -ENOMEM;
 
-	ret = applesmc_read_key(FANS_COUNT, tmp, 1);
+	ret = applesmc_read_key(smc, FANS_COUNT, tmp, 1);
 	if (ret)
 		return ret;
 	s->fan_count = tmp[0];
 	if (s->fan_count > 10)
 		s->fan_count = 10;
 
-	ret = applesmc_get_lower_bound(&s->temp_begin, "T");
+	ret = applesmc_get_lower_bound(smc, &s->temp_begin, "T");
 	if (ret)
 		return ret;
-	ret = applesmc_get_lower_bound(&s->temp_end, "U");
+	ret = applesmc_get_lower_bound(smc, &s->temp_end, "U");
 	if (ret)
 		return ret;
 	s->temp_count = s->temp_end - s->temp_begin;
 
-	ret = applesmc_init_index(s);
+	ret = applesmc_init_index(smc, s);
 	if (ret)
 		return ret;
 
-	ret = applesmc_has_key(LIGHT_SENSOR_LEFT_KEY, &left_light_sensor);
+	ret = applesmc_has_key(smc, LIGHT_SENSOR_LEFT_KEY, &left_light_sensor);
 	if (ret)
 		return ret;
-	ret = applesmc_has_key(LIGHT_SENSOR_RIGHT_KEY, &right_light_sensor);
+	ret = applesmc_has_key(smc, LIGHT_SENSOR_RIGHT_KEY, &right_light_sensor);
 	if (ret)
 		return ret;
-	ret = applesmc_has_key(MOTION_SENSOR_KEY, &s->has_accelerometer);
+	ret = applesmc_has_key(smc, MOTION_SENSOR_KEY, &s->has_accelerometer);
 	if (ret)
 		return ret;
-	ret = applesmc_has_key(BACKLIGHT_KEY, &s->has_key_backlight);
+	ret = applesmc_has_key(smc, BACKLIGHT_KEY, &s->has_key_backlight);
 	if (ret)
 		return ret;
 
@@ -634,13 +896,13 @@ static int applesmc_init_smcreg_try(void)
 	return 0;
 }
 
-static void applesmc_destroy_smcreg(void)
+static void applesmc_destroy_smcreg(struct applesmc_device *smc)
 {
-	kfree(smcreg.index);
-	smcreg.index = NULL;
-	kfree(smcreg.cache);
-	smcreg.cache = NULL;
-	smcreg.init_complete = false;
+	kfree(smc->reg.index);
+	smc->reg.index = NULL;
+	kfree(smc->reg.cache);
+	smc->reg.cache = NULL;
+	smc->reg.init_complete = false;
 }
 
 /*
@@ -649,12 +911,12 @@ static void applesmc_destroy_smcreg(void)
  * Retries until initialization is successful, or the operation times out.
  *
  */
-static int applesmc_init_smcreg(void)
+static int applesmc_init_smcreg(struct applesmc_device *smc)
 {
 	int ms, ret;
 
 	for (ms = 0; ms < INIT_TIMEOUT_MSECS; ms += INIT_WAIT_MSECS) {
-		ret = applesmc_init_smcreg_try();
+		ret = applesmc_init_smcreg_try(smc);
 		if (!ret) {
 			if (ms)
 				pr_info("init_smcreg() took %d ms\n", ms);
@@ -663,50 +925,223 @@ static int applesmc_init_smcreg(void)
 		msleep(INIT_WAIT_MSECS);
 	}
 
-	applesmc_destroy_smcreg();
+	applesmc_destroy_smcreg(smc);
 
 	return ret;
 }
 
 /* Device model stuff */
-static int applesmc_probe(struct platform_device *dev)
+
+static int applesmc_init_resources(struct applesmc_device *smc);
+static void applesmc_free_resources(struct applesmc_device *smc);
+static int applesmc_create_modules(struct applesmc_device *smc);
+static void applesmc_destroy_modules(struct applesmc_device *smc);
+
+static int applesmc_add(struct acpi_device *dev)
 {
+	struct applesmc_device *smc;
 	int ret;
 
-	ret = applesmc_init_smcreg();
+	smc = kzalloc(sizeof(struct applesmc_device), GFP_KERNEL);
+	if (!smc)
+		return -ENOMEM;
+	smc->dev = dev;
+	smc->ldev = &dev->dev;
+	mutex_init(&smc->reg.mutex);
+
+	dev_set_drvdata(&dev->dev, smc);
+
+	ret = applesmc_init_resources(smc);
 	if (ret)
-		return ret;
+		goto out_mem;
+
+	ret = applesmc_init_smcreg(smc);
+	if (ret)
+		goto out_res;
+
+	applesmc_device_init(smc);
+
+	ret = applesmc_create_modules(smc);
+	if (ret)
+		goto out_reg;
+
+	return 0;
+
+out_reg:
+	applesmc_destroy_smcreg(smc);
+out_res:
+	applesmc_free_resources(smc);
+out_mem:
+	dev_set_drvdata(&dev->dev, NULL);
+	mutex_destroy(&smc->reg.mutex);
+	kfree(smc);
+
+	return ret;
+}
+
+static void applesmc_remove(struct acpi_device *dev)
+{
+	struct applesmc_device *smc = dev_get_drvdata(&dev->dev);
+
+	applesmc_destroy_modules(smc);
+	applesmc_destroy_smcreg(smc);
+	applesmc_free_resources(smc);
 
-	applesmc_device_init();
+	mutex_destroy(&smc->reg.mutex);
+	kfree(smc);
+
+	return;
+}
+
+static acpi_status applesmc_walk_resources(struct acpi_resource *res,
+	void *data)
+{
+	struct applesmc_device *smc = data;
+
+	switch (res->type) {
+	case ACPI_RESOURCE_TYPE_IO:
+		if (!smc->port_base_set) {
+			if (res->data.io.address_length < APPLESMC_NR_PORTS)
+				return AE_ERROR;
+			smc->port_base = res->data.io.minimum;
+			smc->port_base_set = true;
+		}
+		return AE_OK;
+
+	case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
+		if (!smc->iomem_base_set) {
+			if (res->data.fixed_memory32.address_length <
+					APPLESMC_IOMEM_MIN_SIZE) {
+				dev_warn(smc->ldev, "found iomem but it's too small: %u\n",
+						 res->data.fixed_memory32.address_length);
+				return AE_OK;
+			}
+			smc->iomem_base_addr = res->data.fixed_memory32.address;
+			smc->iomem_base_size = res->data.fixed_memory32.address_length;
+			smc->iomem_base_set = true;
+		}
+		return AE_OK;
+
+	case ACPI_RESOURCE_TYPE_END_TAG:
+		if (smc->port_base_set)
+			return AE_OK;
+		else
+			return AE_NOT_FOUND;
+
+	default:
+		return AE_OK;
+	}
+}
+
+static int applesmc_try_enable_iomem(struct applesmc_device *smc);
+
+static int applesmc_init_resources(struct applesmc_device *smc)
+{
+	int ret;
+
+	ret = acpi_walk_resources(smc->dev->handle, METHOD_NAME__CRS,
+			applesmc_walk_resources, smc);
+	if (ACPI_FAILURE(ret))
+		return -ENXIO;
+
+	if (!request_region(smc->port_base, APPLESMC_NR_PORTS, "applesmc"))
+		return -ENXIO;
+
+	if (smc->iomem_base_set) {
+		if (applesmc_try_enable_iomem(smc))
+			smc->iomem_base_set = false;
+	}
+
+	return 0;
+}
+
+static int applesmc_try_enable_iomem(struct applesmc_device *smc)
+{
+	u8 test_val, ldkn_version;
+
+	dev_dbg(smc->ldev, "Trying to enable iomem based communication\n");
+	smc->iomem_base = ioremap(smc->iomem_base_addr, smc->iomem_base_size);
+	if (!smc->iomem_base)
+		goto out;
+
+	/* Apple's driver does this check for some reason */
+	test_val = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS);
+	if (test_val == 0xff) {
+		dev_warn(smc->ldev,
+			 "iomem enable failed: initial status is 0xff (is %x)\n",
+			 test_val);
+		goto out_iomem;
+	}
+
+	if (read_smc(smc, "LDKN", &ldkn_version, 1)) {
+		dev_warn(smc->ldev, "iomem enable failed: ldkn read failed\n");
+		goto out_iomem;
+	}
+
+	if (ldkn_version < 2) {
+		dev_warn(smc->ldev,
+			 "iomem enable failed: ldkn version %u is less than minimum (2)\n",
+			 ldkn_version);
+		goto out_iomem;
+	}
 
 	return 0;
+
+out_iomem:
+	iounmap(smc->iomem_base);
+
+out:
+	return -ENXIO;
+}
+
+static void applesmc_free_resources(struct applesmc_device *smc)
+{
+	if (smc->iomem_base_set)
+		iounmap(smc->iomem_base);
+	release_region(smc->port_base, APPLESMC_NR_PORTS);
 }
 
 /* Synchronize device with memorized backlight state */
 static int applesmc_pm_resume(struct device *dev)
 {
-	if (smcreg.has_key_backlight)
-		applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2);
+	struct applesmc_device *smc = dev_get_drvdata(dev);
+
+	if (smc->reg.has_key_backlight)
+		applesmc_write_key(smc, BACKLIGHT_KEY, smc->backlight_state, 2);
+
 	return 0;
 }
 
 /* Reinitialize device on resume from hibernation */
 static int applesmc_pm_restore(struct device *dev)
 {
-	applesmc_device_init();
+	struct applesmc_device *smc = dev_get_drvdata(dev);
+
+	applesmc_device_init(smc);
+
 	return applesmc_pm_resume(dev);
 }
 
+static const struct acpi_device_id applesmc_ids[] = {
+	{"APP0001", 0},
+	{"", 0},
+};
+
 static const struct dev_pm_ops applesmc_pm_ops = {
 	.resume = applesmc_pm_resume,
 	.restore = applesmc_pm_restore,
 };
 
-static struct platform_driver applesmc_driver = {
-	.probe = applesmc_probe,
-	.driver	= {
-		.name = "applesmc",
-		.pm = &applesmc_pm_ops,
+static struct acpi_driver applesmc_driver = {
+	.name = "applesmc",
+	.class = "applesmc",
+	.ids = applesmc_ids,
+	.ops = {
+		.add = applesmc_add,
+		.remove = applesmc_remove
+	},
+	.drv = {
+		.pm = &applesmc_pm_ops
 	},
 };
 
@@ -714,25 +1149,26 @@ static struct platform_driver applesmc_driver = {
  * applesmc_calibrate - Set our "resting" values.  Callers must
  * hold applesmc_lock.
  */
-static void applesmc_calibrate(void)
+static void applesmc_calibrate(struct applesmc_device *smc)
 {
-	applesmc_read_s16(MOTION_SENSOR_X_KEY, &rest_x);
-	applesmc_read_s16(MOTION_SENSOR_Y_KEY, &rest_y);
-	rest_x = -rest_x;
+	applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &smc->rest_x);
+	applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &smc->rest_y);
+	smc->rest_x = -smc->rest_x;
 }
 
 static void applesmc_idev_poll(struct input_dev *idev)
 {
+	struct applesmc_device *smc = dev_get_drvdata(&idev->dev);
 	s16 x, y;
 
-	if (applesmc_read_s16(MOTION_SENSOR_X_KEY, &x))
+	if (applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &x))
 		return;
-	if (applesmc_read_s16(MOTION_SENSOR_Y_KEY, &y))
+	if (applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &y))
 		return;
 
 	x = -x;
-	input_report_abs(idev, ABS_X, x - rest_x);
-	input_report_abs(idev, ABS_Y, y - rest_y);
+	input_report_abs(idev, ABS_X, x - smc->rest_x);
+	input_report_abs(idev, ABS_Y, y - smc->rest_y);
 	input_sync(idev);
 }
 
@@ -747,16 +1183,17 @@ static ssize_t applesmc_name_show(struct device *dev,
 static ssize_t applesmc_position_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
+	struct applesmc_device *smc = dev_get_drvdata(dev);
 	int ret;
 	s16 x, y, z;
 
-	ret = applesmc_read_s16(MOTION_SENSOR_X_KEY, &x);
+	ret = applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &x);
 	if (ret)
 		goto out;
-	ret = applesmc_read_s16(MOTION_SENSOR_Y_KEY, &y);
+	ret = applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &y);
 	if (ret)
 		goto out;
-	ret = applesmc_read_s16(MOTION_SENSOR_Z_KEY, &z);
+	ret = applesmc_read_s16(smc, MOTION_SENSOR_Z_KEY, &z);
 	if (ret)
 		goto out;
 
@@ -770,6 +1207,7 @@ static ssize_t applesmc_position_show(struct device *dev,
 static ssize_t applesmc_light_show(struct device *dev,
 				struct device_attribute *attr, char *sysfsbuf)
 {
+	struct applesmc_device *smc = dev_get_drvdata(dev);
 	const struct applesmc_entry *entry;
 	static int data_length;
 	int ret;
@@ -777,7 +1215,7 @@ static ssize_t applesmc_light_show(struct device *dev,
 	u8 buffer[10];
 
 	if (!data_length) {
-		entry = applesmc_get_entry_by_key(LIGHT_SENSOR_LEFT_KEY);
+		entry = applesmc_get_entry_by_key(smc, LIGHT_SENSOR_LEFT_KEY);
 		if (IS_ERR(entry))
 			return PTR_ERR(entry);
 		if (entry->len > 10)
@@ -786,7 +1224,7 @@ static ssize_t applesmc_light_show(struct device *dev,
 		pr_info("light sensor data length set to %d\n", data_length);
 	}
 
-	ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, data_length);
+	ret = applesmc_read_key(smc, LIGHT_SENSOR_LEFT_KEY, buffer, data_length);
 	if (ret)
 		goto out;
 	/* newer macbooks report a single 10-bit bigendian value */
@@ -796,7 +1234,7 @@ static ssize_t applesmc_light_show(struct device *dev,
 	}
 	left = buffer[2];
 
-	ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, data_length);
+	ret = applesmc_read_key(smc, LIGHT_SENSOR_RIGHT_KEY, buffer, data_length);
 	if (ret)
 		goto out;
 	right = buffer[2];
@@ -812,7 +1250,8 @@ static ssize_t applesmc_light_show(struct device *dev,
 static ssize_t applesmc_show_sensor_label(struct device *dev,
 			struct device_attribute *devattr, char *sysfsbuf)
 {
-	const char *key = smcreg.index[to_index(devattr)];
+	struct applesmc_device *smc = dev_get_drvdata(dev);
+	const char *key = smc->reg.index[to_index(devattr)];
 
 	return sysfs_emit(sysfsbuf, "%s\n", key);
 }
@@ -821,12 +1260,13 @@ static ssize_t applesmc_show_sensor_label(struct device *dev,
 static ssize_t applesmc_show_temperature(struct device *dev,
 			struct device_attribute *devattr, char *sysfsbuf)
 {
-	const char *key = smcreg.index[to_index(devattr)];
+	struct applesmc_device *smc = dev_get_drvdata(dev);
+	const char *key = smc->reg.index[to_index(devattr)];
 	int ret;
 	s16 value;
 	int temp;
 
-	ret = applesmc_read_s16(key, &value);
+	ret = applesmc_read_s16(smc, key, &value);
 	if (ret)
 		return ret;
 
@@ -838,6 +1278,8 @@ static ssize_t applesmc_show_temperature(struct device *dev,
 static ssize_t applesmc_show_fan_speed(struct device *dev,
 				struct device_attribute *attr, char *sysfsbuf)
 {
+	struct applesmc_device *smc = dev_get_drvdata(dev);
+	const struct applesmc_entry *entry;
 	int ret;
 	unsigned int speed = 0;
 	char newkey[5];
@@ -846,11 +1288,21 @@ static ssize_t applesmc_show_fan_speed(struct device *dev,
 	scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)],
 		  to_index(attr));
 
-	ret = applesmc_read_key(newkey, buffer, 2);
+	entry = applesmc_get_entry_by_key(smc, newkey);
+	if (IS_ERR(entry))
+		return PTR_ERR(entry);
+
+	if (!strcmp(entry->type, FLOAT_TYPE)) {
+		ret = applesmc_read_entry(smc, entry, (u8 *) &speed, 4);
+		speed = applesmc_float_to_u32(speed);
+	} else {
+		ret = applesmc_read_entry(smc, entry, buffer, 2);
+		speed = ((buffer[0] << 8 | buffer[1]) >> 2);
+	}
+
 	if (ret)
 		return ret;
 
-	speed = ((buffer[0] << 8 | buffer[1]) >> 2);
 	return sysfs_emit(sysfsbuf, "%u\n", speed);
 }
 
@@ -858,6 +1310,8 @@ static ssize_t applesmc_store_fan_speed(struct device *dev,
 					struct device_attribute *attr,
 					const char *sysfsbuf, size_t count)
 {
+	struct applesmc_device *smc = dev_get_drvdata(dev);
+	const struct applesmc_entry *entry;
 	int ret;
 	unsigned long speed;
 	char newkey[5];
@@ -869,9 +1323,18 @@ static ssize_t applesmc_store_fan_speed(struct device *dev,
 	scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)],
 		  to_index(attr));
 
-	buffer[0] = (speed >> 6) & 0xff;
-	buffer[1] = (speed << 2) & 0xff;
-	ret = applesmc_write_key(newkey, buffer, 2);
+	entry = applesmc_get_entry_by_key(smc, newkey);
+	if (IS_ERR(entry))
+		return PTR_ERR(entry);
+
+	if (!strcmp(entry->type, FLOAT_TYPE)) {
+		speed = applesmc_u32_to_float(speed);
+		ret = applesmc_write_entry(smc, entry, (u8 *) &speed, 4);
+	} else {
+		buffer[0] = (speed >> 6) & 0xff;
+		buffer[1] = (speed << 2) & 0xff;
+		ret = applesmc_write_key(smc, newkey, buffer, 2);
+	}
 
 	if (ret)
 		return ret;
@@ -882,15 +1345,30 @@ static ssize_t applesmc_store_fan_speed(struct device *dev,
 static ssize_t applesmc_show_fan_manual(struct device *dev,
 			struct device_attribute *attr, char *sysfsbuf)
 {
+	struct applesmc_device *smc = dev_get_drvdata(dev);
 	int ret;
 	u16 manual = 0;
 	u8 buffer[2];
+	char newkey[5];
+	bool has_newkey = false;
+
+	scnprintf(newkey, sizeof(newkey), FAN_MANUAL_FMT, to_index(attr));
+
+	ret = applesmc_has_key(smc, newkey, &has_newkey);
+	if (ret)
+		return ret;
+
+	if (has_newkey) {
+		ret = applesmc_read_key(smc, newkey, buffer, 1);
+		manual = buffer[0];
+	} else {
+		ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2);
+		manual = ((buffer[0] << 8 | buffer[1]) >> to_index(attr)) & 0x01;
+	}
 
-	ret = applesmc_read_key(FANS_MANUAL, buffer, 2);
 	if (ret)
 		return ret;
 
-	manual = ((buffer[0] << 8 | buffer[1]) >> to_index(attr)) & 0x01;
 	return sysfs_emit(sysfsbuf, "%d\n", manual);
 }
 
@@ -898,29 +1376,42 @@ static ssize_t applesmc_store_fan_manual(struct device *dev,
 					 struct device_attribute *attr,
 					 const char *sysfsbuf, size_t count)
 {
+	struct applesmc_device *smc = dev_get_drvdata(dev);
 	int ret;
 	u8 buffer[2];
+	char newkey[5];
+	bool has_newkey = false;
 	unsigned long input;
 	u16 val;
 
 	if (kstrtoul(sysfsbuf, 10, &input) < 0)
 		return -EINVAL;
 
-	ret = applesmc_read_key(FANS_MANUAL, buffer, 2);
+	scnprintf(newkey, sizeof(newkey), FAN_MANUAL_FMT, to_index(attr));
+
+	ret = applesmc_has_key(smc, newkey, &has_newkey);
 	if (ret)
-		goto out;
+		return ret;
 
-	val = (buffer[0] << 8 | buffer[1]);
+	if (has_newkey) {
+		buffer[0] = input & 1;
+		ret = applesmc_write_key(smc, newkey, buffer, 1);
+	} else {
+		ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2);
+		val = (buffer[0] << 8 | buffer[1]);
+		if (ret)
+			goto out;
 
-	if (input)
-		val = val | (0x01 << to_index(attr));
-	else
-		val = val & ~(0x01 << to_index(attr));
+		if (input)
+			val = val | (0x01 << to_index(attr));
+		else
+			val = val & ~(0x01 << to_index(attr));
 
-	buffer[0] = (val >> 8) & 0xFF;
-	buffer[1] = val & 0xFF;
+		buffer[0] = (val >> 8) & 0xFF;
+		buffer[1] = val & 0xFF;
 
-	ret = applesmc_write_key(FANS_MANUAL, buffer, 2);
+		ret = applesmc_write_key(smc, FANS_MANUAL, buffer, 2);
+	}
 
 out:
 	if (ret)
@@ -932,13 +1423,14 @@ static ssize_t applesmc_store_fan_manual(struct device *dev,
 static ssize_t applesmc_show_fan_position(struct device *dev,
 				struct device_attribute *attr, char *sysfsbuf)
 {
+	struct applesmc_device *smc = dev_get_drvdata(dev);
 	int ret;
 	char newkey[5];
 	u8 buffer[17];
 
 	scnprintf(newkey, sizeof(newkey), FAN_ID_FMT, to_index(attr));
 
-	ret = applesmc_read_key(newkey, buffer, 16);
+	ret = applesmc_read_key(smc, newkey, buffer, 16);
 	buffer[16] = 0;
 
 	if (ret)
@@ -950,43 +1442,79 @@ static ssize_t applesmc_show_fan_position(struct device *dev,
 static ssize_t applesmc_calibrate_show(struct device *dev,
 				struct device_attribute *attr, char *sysfsbuf)
 {
-	return sysfs_emit(sysfsbuf, "(%d,%d)\n", rest_x, rest_y);
+	struct applesmc_device *smc = dev_get_drvdata(dev);
+
+	return sysfs_emit(sysfsbuf, "(%d,%d)\n", smc->rest_x, smc->rest_y);
 }
 
 static ssize_t applesmc_calibrate_store(struct device *dev,
 	struct device_attribute *attr, const char *sysfsbuf, size_t count)
 {
-	applesmc_calibrate();
+	struct applesmc_device *smc = dev_get_drvdata(dev);
+
+	applesmc_calibrate(smc);
 
 	return count;
 }
 
 static void applesmc_backlight_set(struct work_struct *work)
 {
-	applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2);
+	struct applesmc_device *smc = container_of(work, struct applesmc_device, backlight_work);
+
+	applesmc_write_key(smc, BACKLIGHT_KEY, smc->backlight_state, 2);
 }
-static DECLARE_WORK(backlight_work, &applesmc_backlight_set);
 
 static void applesmc_brightness_set(struct led_classdev *led_cdev,
 						enum led_brightness value)
 {
+	struct applesmc_device *smc = dev_get_drvdata(led_cdev->dev);
 	int ret;
 
-	backlight_state[0] = value;
-	ret = queue_work(applesmc_led_wq, &backlight_work);
+	smc->backlight_state[0] = value;
+	ret = queue_work(smc->backlight_wq, &smc->backlight_work);
 
 	if (debug && (!ret))
 		dev_dbg(led_cdev->dev, "work was already on the queue.\n");
 }
 
+static ssize_t applesmc_BCLM_store(struct device *dev,
+		struct device_attribute *attr, char *sysfsbuf, size_t count)
+{
+	struct applesmc_device *smc = dev_get_drvdata(dev);
+	u8 val;
+
+	if (kstrtou8(sysfsbuf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (val < 0 || val > 100)
+		return -EINVAL;
+
+	if (applesmc_write_key(smc, "BCLM", &val, 1))
+		return -ENODEV;
+	return count;
+}
+
+static ssize_t applesmc_BCLM_show(struct device *dev,
+		struct device_attribute *attr, char *sysfsbuf)
+{
+	struct applesmc_device *smc = dev_get_drvdata(dev);
+	u8 val;
+
+	if (applesmc_read_key(smc, "BCLM", &val, 1))
+		return -ENODEV;
+
+	return sysfs_emit(sysfsbuf, "%d\n", val);
+}
+
 static ssize_t applesmc_key_count_show(struct device *dev,
 				struct device_attribute *attr, char *sysfsbuf)
 {
+	struct applesmc_device *smc = dev_get_drvdata(dev);
 	int ret;
 	u8 buffer[4];
 	u32 count;
 
-	ret = applesmc_read_key(KEY_COUNT_KEY, buffer, 4);
+	ret = applesmc_read_key(smc, KEY_COUNT_KEY, buffer, 4);
 	if (ret)
 		return ret;
 
@@ -998,13 +1526,14 @@ static ssize_t applesmc_key_count_show(struct device *dev,
 static ssize_t applesmc_key_at_index_read_show(struct device *dev,
 				struct device_attribute *attr, char *sysfsbuf)
 {
+	struct applesmc_device *smc = dev_get_drvdata(dev);
 	const struct applesmc_entry *entry;
 	int ret;
 
-	entry = applesmc_get_entry_by_index(key_at_index);
+	entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
 	if (IS_ERR(entry))
 		return PTR_ERR(entry);
-	ret = applesmc_read_entry(entry, sysfsbuf, entry->len);
+	ret = applesmc_read_entry(smc, entry, sysfsbuf, entry->len);
 	if (ret)
 		return ret;
 
@@ -1014,9 +1543,10 @@ static ssize_t applesmc_key_at_index_read_show(struct device *dev,
 static ssize_t applesmc_key_at_index_data_length_show(struct device *dev,
 				struct device_attribute *attr, char *sysfsbuf)
 {
+	struct applesmc_device *smc = dev_get_drvdata(dev);
 	const struct applesmc_entry *entry;
 
-	entry = applesmc_get_entry_by_index(key_at_index);
+	entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
 	if (IS_ERR(entry))
 		return PTR_ERR(entry);
 
@@ -1026,9 +1556,10 @@ static ssize_t applesmc_key_at_index_data_length_show(struct device *dev,
 static ssize_t applesmc_key_at_index_type_show(struct device *dev,
 				struct device_attribute *attr, char *sysfsbuf)
 {
+	struct applesmc_device *smc = dev_get_drvdata(dev);
 	const struct applesmc_entry *entry;
 
-	entry = applesmc_get_entry_by_index(key_at_index);
+	entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
 	if (IS_ERR(entry))
 		return PTR_ERR(entry);
 
@@ -1038,9 +1569,10 @@ static ssize_t applesmc_key_at_index_type_show(struct device *dev,
 static ssize_t applesmc_key_at_index_name_show(struct device *dev,
 				struct device_attribute *attr, char *sysfsbuf)
 {
+	struct applesmc_device *smc = dev_get_drvdata(dev);
 	const struct applesmc_entry *entry;
 
-	entry = applesmc_get_entry_by_index(key_at_index);
+	entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
 	if (IS_ERR(entry))
 		return PTR_ERR(entry);
 
@@ -1050,28 +1582,25 @@ static ssize_t applesmc_key_at_index_name_show(struct device *dev,
 static ssize_t applesmc_key_at_index_show(struct device *dev,
 				struct device_attribute *attr, char *sysfsbuf)
 {
-	return sysfs_emit(sysfsbuf, "%d\n", key_at_index);
+	struct applesmc_device *smc = dev_get_drvdata(dev);
+
+	return sysfs_emit(sysfsbuf, "%d\n", smc->key_at_index);
 }
 
 static ssize_t applesmc_key_at_index_store(struct device *dev,
 	struct device_attribute *attr, const char *sysfsbuf, size_t count)
 {
+	struct applesmc_device *smc = dev_get_drvdata(dev);
 	unsigned long newkey;
 
 	if (kstrtoul(sysfsbuf, 10, &newkey) < 0
-	    || newkey >= smcreg.key_count)
+	    || newkey >= smc->reg.key_count)
 		return -EINVAL;
 
-	key_at_index = newkey;
+	smc->key_at_index = newkey;
 	return count;
 }
 
-static struct led_classdev applesmc_backlight = {
-	.name			= "smc::kbd_backlight",
-	.default_trigger	= "nand-disk",
-	.brightness_set		= applesmc_brightness_set,
-};
-
 static struct applesmc_node_group info_group[] = {
 	{ "name", applesmc_name_show },
 	{ "key_count", applesmc_key_count_show },
@@ -1111,19 +1640,25 @@ static struct applesmc_node_group temp_group[] = {
 	{ }
 };
 
+static struct applesmc_node_group BCLM_group[] = {
+	{ "battery_charge_limit", applesmc_BCLM_show, applesmc_BCLM_store },
+	{ }
+};
+
 /* Module stuff */
 
 /*
  * applesmc_destroy_nodes - remove files and free associated memory
  */
-static void applesmc_destroy_nodes(struct applesmc_node_group *groups)
+static void applesmc_destroy_nodes(struct applesmc_device *smc,
+	struct applesmc_node_group *groups)
 {
 	struct applesmc_node_group *grp;
 	struct applesmc_dev_attr *node;
 
 	for (grp = groups; grp->nodes; grp++) {
 		for (node = grp->nodes; node->sda.dev_attr.attr.name; node++)
-			sysfs_remove_file(&pdev->dev.kobj,
+			sysfs_remove_file(&smc->dev->dev.kobj,
 					  &node->sda.dev_attr.attr);
 		kfree(grp->nodes);
 		grp->nodes = NULL;
@@ -1133,7 +1668,8 @@ static void applesmc_destroy_nodes(struct applesmc_node_group *groups)
 /*
  * applesmc_create_nodes - create a two-dimensional group of sysfs files
  */
-static int applesmc_create_nodes(struct applesmc_node_group *groups, int num)
+static int applesmc_create_nodes(struct applesmc_device *smc,
+	struct applesmc_node_group *groups, int num)
 {
 	struct applesmc_node_group *grp;
 	struct applesmc_dev_attr *node;
@@ -1157,7 +1693,7 @@ static int applesmc_create_nodes(struct applesmc_node_group *groups, int num)
 			sysfs_attr_init(attr);
 			attr->name = node->name;
 			attr->mode = 0444 | (grp->store ? 0200 : 0);
-			ret = sysfs_create_file(&pdev->dev.kobj, attr);
+			ret = sysfs_create_file(&smc->dev->dev.kobj, attr);
 			if (ret) {
 				attr->name = NULL;
 				goto out;
@@ -1167,57 +1703,56 @@ static int applesmc_create_nodes(struct applesmc_node_group *groups, int num)
 
 	return 0;
 out:
-	applesmc_destroy_nodes(groups);
+	applesmc_destroy_nodes(smc, groups);
 	return ret;
 }
 
 /* Create accelerometer resources */
-static int applesmc_create_accelerometer(void)
+static int applesmc_create_accelerometer(struct applesmc_device *smc)
 {
 	int ret;
-
-	if (!smcreg.has_accelerometer)
+	if (!smc->reg.has_accelerometer)
 		return 0;
 
-	ret = applesmc_create_nodes(accelerometer_group, 1);
+	ret = applesmc_create_nodes(smc, accelerometer_group, 1);
 	if (ret)
 		goto out;
 
-	applesmc_idev = input_allocate_device();
-	if (!applesmc_idev) {
+	smc->idev = input_allocate_device();
+	if (!smc->idev) {
 		ret = -ENOMEM;
 		goto out_sysfs;
 	}
 
 	/* initial calibrate for the input device */
-	applesmc_calibrate();
+	applesmc_calibrate(smc);
 
 	/* initialize the input device */
-	applesmc_idev->name = "applesmc";
-	applesmc_idev->id.bustype = BUS_HOST;
-	applesmc_idev->dev.parent = &pdev->dev;
-	input_set_abs_params(applesmc_idev, ABS_X,
+	smc->idev->name = "applesmc";
+	smc->idev->id.bustype = BUS_HOST;
+	smc->idev->dev.parent = &smc->dev->dev;
+	input_set_abs_params(smc->idev, ABS_X,
 			-256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
-	input_set_abs_params(applesmc_idev, ABS_Y,
+	input_set_abs_params(smc->idev, ABS_Y,
 			-256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
 
-	ret = input_setup_polling(applesmc_idev, applesmc_idev_poll);
+	ret = input_setup_polling(smc->idev, applesmc_idev_poll);
 	if (ret)
 		goto out_idev;
 
-	input_set_poll_interval(applesmc_idev, APPLESMC_POLL_INTERVAL);
+	input_set_poll_interval(smc->idev, APPLESMC_POLL_INTERVAL);
 
-	ret = input_register_device(applesmc_idev);
+	ret = input_register_device(smc->idev);
 	if (ret)
 		goto out_idev;
 
 	return 0;
 
 out_idev:
-	input_free_device(applesmc_idev);
+	input_free_device(smc->idev);
 
 out_sysfs:
-	applesmc_destroy_nodes(accelerometer_group);
+	applesmc_destroy_nodes(smc, accelerometer_group);
 
 out:
 	pr_warn("driver init failed (ret=%d)!\n", ret);
@@ -1225,44 +1760,55 @@ static int applesmc_create_accelerometer(void)
 }
 
 /* Release all resources used by the accelerometer */
-static void applesmc_release_accelerometer(void)
+static void applesmc_release_accelerometer(struct applesmc_device *smc)
 {
-	if (!smcreg.has_accelerometer)
+	if (!smc->reg.has_accelerometer)
 		return;
-	input_unregister_device(applesmc_idev);
-	applesmc_destroy_nodes(accelerometer_group);
+	input_unregister_device(smc->idev);
+	applesmc_destroy_nodes(smc, accelerometer_group);
 }
 
-static int applesmc_create_light_sensor(void)
+static int applesmc_create_light_sensor(struct applesmc_device *smc)
 {
-	if (!smcreg.num_light_sensors)
+	if (!smc->reg.num_light_sensors)
 		return 0;
-	return applesmc_create_nodes(light_sensor_group, 1);
+	return applesmc_create_nodes(smc, light_sensor_group, 1);
 }
 
-static void applesmc_release_light_sensor(void)
+static void applesmc_release_light_sensor(struct applesmc_device *smc)
 {
-	if (!smcreg.num_light_sensors)
+	if (!smc->reg.num_light_sensors)
 		return;
-	applesmc_destroy_nodes(light_sensor_group);
+	applesmc_destroy_nodes(smc, light_sensor_group);
 }
 
-static int applesmc_create_key_backlight(void)
+static int applesmc_create_key_backlight(struct applesmc_device *smc)
 {
-	if (!smcreg.has_key_backlight)
+	int ret;
+
+	if (!smc->reg.has_key_backlight)
 		return 0;
-	applesmc_led_wq = create_singlethread_workqueue("applesmc-led");
-	if (!applesmc_led_wq)
+	smc->backlight_wq = create_singlethread_workqueue("applesmc-led");
+	if (!smc->backlight_wq)
 		return -ENOMEM;
-	return led_classdev_register(&pdev->dev, &applesmc_backlight);
+
+	INIT_WORK(&smc->backlight_work, applesmc_backlight_set);
+	smc->backlight_dev.name = "smc::kbd_backlight";
+	smc->backlight_dev.default_trigger = "nand-disk";
+	smc->backlight_dev.brightness_set = applesmc_brightness_set;
+	ret = led_classdev_register(&smc->dev->dev, &smc->backlight_dev);
+	if (ret)
+		destroy_workqueue(smc->backlight_wq);
+
+	return ret;
 }
 
-static void applesmc_release_key_backlight(void)
+static void applesmc_release_key_backlight(struct applesmc_device *smc)
 {
-	if (!smcreg.has_key_backlight)
+	if (!smc->reg.has_key_backlight)
 		return;
-	led_classdev_unregister(&applesmc_backlight);
-	destroy_workqueue(applesmc_led_wq);
+	led_classdev_unregister(&smc->backlight_dev);
+	destroy_workqueue(smc->backlight_wq);
 }
 
 static int applesmc_dmi_match(const struct dmi_system_id *id)
@@ -1291,6 +1837,10 @@ static const struct dmi_system_id applesmc_whitelist[] __initconst = {
 	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME, "Macmini") },
 	},
+	{ applesmc_dmi_match, "Apple iMacPro", {
+	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+	  DMI_MATCH(DMI_PRODUCT_NAME, "iMacPro") },
+	},
 	{ applesmc_dmi_match, "Apple MacPro", {
 	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME, "MacPro") },
@@ -1306,90 +1856,91 @@ static const struct dmi_system_id applesmc_whitelist[] __initconst = {
 	{ .ident = NULL }
 };
 
-static int __init applesmc_init(void)
+static int applesmc_create_modules(struct applesmc_device *smc)
 {
 	int ret;
 
-	if (!dmi_check_system(applesmc_whitelist)) {
-		pr_warn("supported laptop not found!\n");
-		ret = -ENODEV;
-		goto out;
-	}
-
-	if (!request_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS,
-								"applesmc")) {
-		ret = -ENXIO;
-		goto out;
-	}
-
-	ret = platform_driver_register(&applesmc_driver);
-	if (ret)
-		goto out_region;
-
-	pdev = platform_device_register_simple("applesmc", APPLESMC_DATA_PORT,
-					       NULL, 0);
-	if (IS_ERR(pdev)) {
-		ret = PTR_ERR(pdev);
-		goto out_driver;
-	}
-
-	/* create register cache */
-	ret = applesmc_init_smcreg();
+	ret = applesmc_create_nodes(smc, info_group, 1);
 	if (ret)
-		goto out_device;
-
-	ret = applesmc_create_nodes(info_group, 1);
+		goto out;
+	ret = applesmc_create_nodes(smc, BCLM_group, 1);
 	if (ret)
-		goto out_smcreg;
+		goto out_info;
 
-	ret = applesmc_create_nodes(fan_group, smcreg.fan_count);
+	ret = applesmc_create_nodes(smc, fan_group, smc->reg.fan_count);
 	if (ret)
-		goto out_info;
+		goto out_bclm;
 
-	ret = applesmc_create_nodes(temp_group, smcreg.index_count);
+	ret = applesmc_create_nodes(smc, temp_group, smc->reg.index_count);
 	if (ret)
 		goto out_fans;
 
-	ret = applesmc_create_accelerometer();
+	ret = applesmc_create_accelerometer(smc);
 	if (ret)
 		goto out_temperature;
 
-	ret = applesmc_create_light_sensor();
+	ret = applesmc_create_light_sensor(smc);
 	if (ret)
 		goto out_accelerometer;
 
-	ret = applesmc_create_key_backlight();
+	ret = applesmc_create_key_backlight(smc);
 	if (ret)
 		goto out_light_sysfs;
 
-	hwmon_dev = hwmon_device_register(&pdev->dev);
-	if (IS_ERR(hwmon_dev)) {
-		ret = PTR_ERR(hwmon_dev);
+	smc->hwmon_dev = hwmon_device_register(&smc->dev->dev);
+	if (IS_ERR(smc->hwmon_dev)) {
+		ret = PTR_ERR(smc->hwmon_dev);
 		goto out_light_ledclass;
 	}
 
 	return 0;
 
 out_light_ledclass:
-	applesmc_release_key_backlight();
+	applesmc_release_key_backlight(smc);
 out_light_sysfs:
-	applesmc_release_light_sensor();
+	applesmc_release_light_sensor(smc);
 out_accelerometer:
-	applesmc_release_accelerometer();
+	applesmc_release_accelerometer(smc);
 out_temperature:
-	applesmc_destroy_nodes(temp_group);
+	applesmc_destroy_nodes(smc, temp_group);
 out_fans:
-	applesmc_destroy_nodes(fan_group);
+	applesmc_destroy_nodes(smc, fan_group);
+out_bclm:
+	applesmc_destroy_nodes(smc, BCLM_group);
 out_info:
-	applesmc_destroy_nodes(info_group);
-out_smcreg:
-	applesmc_destroy_smcreg();
-out_device:
-	platform_device_unregister(pdev);
-out_driver:
-	platform_driver_unregister(&applesmc_driver);
-out_region:
-	release_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS);
+	applesmc_destroy_nodes(smc, info_group);
+out:
+	return ret;
+}
+
+static void applesmc_destroy_modules(struct applesmc_device *smc)
+{
+	hwmon_device_unregister(smc->hwmon_dev);
+	applesmc_release_key_backlight(smc);
+	applesmc_release_light_sensor(smc);
+	applesmc_release_accelerometer(smc);
+	applesmc_destroy_nodes(smc, temp_group);
+	applesmc_destroy_nodes(smc, fan_group);
+	applesmc_destroy_nodes(smc, BCLM_group);
+	applesmc_destroy_nodes(smc, info_group);
+}
+
+static int __init applesmc_init(void)
+{
+	int ret;
+
+	if (!dmi_check_system(applesmc_whitelist)) {
+		pr_warn("supported laptop not found!\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	ret = acpi_bus_register_driver(&applesmc_driver);
+	if (ret)
+		goto out;
+
+	return 0;
+
 out:
 	pr_warn("driver init failed (ret=%d)!\n", ret);
 	return ret;
@@ -1397,23 +1948,14 @@ static int __init applesmc_init(void)
 
 static void __exit applesmc_exit(void)
 {
-	hwmon_device_unregister(hwmon_dev);
-	applesmc_release_key_backlight();
-	applesmc_release_light_sensor();
-	applesmc_release_accelerometer();
-	applesmc_destroy_nodes(temp_group);
-	applesmc_destroy_nodes(fan_group);
-	applesmc_destroy_nodes(info_group);
-	applesmc_destroy_smcreg();
-	platform_device_unregister(pdev);
-	platform_driver_unregister(&applesmc_driver);
-	release_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS);
+	acpi_bus_unregister_driver(&applesmc_driver);
 }
 
 module_init(applesmc_init);
 module_exit(applesmc_exit);
 
 MODULE_AUTHOR("Nicolas Boichat");
+MODULE_AUTHOR("Paul Pawlowski");
 MODULE_DESCRIPTION("Apple SMC");
 MODULE_LICENSE("GPL v2");
 MODULE_DEVICE_TABLE(dmi, applesmc_whitelist);
diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
index baa242b14099..d89a00080046 100644
--- a/drivers/pci/vgaarb.c
+++ b/drivers/pci/vgaarb.c
@@ -143,6 +143,7 @@ void vga_set_default_device(struct pci_dev *pdev)
 	pci_dev_put(vga_default);
 	vga_default = pci_dev_get(pdev);
 }
+EXPORT_SYMBOL_GPL(vga_set_default_device);
 
 /**
  * vga_remove_vgacon - deactivate VGA console
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index 1417e230edbd..e69785af8e1d 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -21,6 +21,7 @@
 #include <linux/delay.h>
 #include <linux/pci.h>
 #include <linux/vga_switcheroo.h>
+#include <linux/vgaarb.h>
 #include <linux/debugfs.h>
 #include <acpi/video.h>
 #include <asm/io.h>
@@ -107,6 +108,10 @@ struct apple_gmux_config {
 
 # define MMIO_GMUX_MAX_BRIGHTNESS	0xffff
 
+static bool force_igd;
+module_param(force_igd, bool, 0);
+MODULE_PARM_DESC(force_idg, "Switch gpu to igd on module load. Make sure that you have apple-set-os set up and the iGPU is in `lspci -s 00:02.0`. (default: false) (bool)");
+
 static u8 gmux_pio_read8(struct apple_gmux_data *gmux_data, int port)
 {
 	return inb(gmux_data->iostart + port);
@@ -945,6 +950,19 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
 	gmux_enable_interrupts(gmux_data);
 	gmux_read_switch_state(gmux_data);
 
+	if (force_igd) {
+		struct pci_dev *pdev;
+
+		pdev = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(2, 0));
+		if (pdev) {
+			pr_info("Switching to IGD");
+			gmux_switchto(VGA_SWITCHEROO_IGD);
+			vga_set_default_device(pdev);
+		} else {
+			pr_err("force_idg is true, but couldn't find iGPU at 00:02.0! Is apple-set-os working?");
+		}
+	}
+
 	/*
 	 * Retina MacBook Pros cannot switch the panel's AUX separately
 	 * and need eDP pre-calibration. They are distinguishable from
diff --git a/drivers/soc/apple/Kconfig b/drivers/soc/apple/Kconfig
index ad6736889231..90d1a448f1f4 100644
--- a/drivers/soc/apple/Kconfig
+++ b/drivers/soc/apple/Kconfig
@@ -4,6 +4,16 @@ if ARCH_APPLE || COMPILE_TEST
 
 menu "Apple SoC drivers"
 
+config APPLE_DOCKCHANNEL
+	tristate "Apple DockChannel FIFO"
+	depends on ARCH_APPLE || COMPILE_TEST
+	default ARCH_APPLE
+	help
+	  DockChannel is a simple FIFO used on Apple SoCs for debug and inter-processor
+	  communications.
+
+	  Say 'y' here if you have an Apple SoC.
+
 config APPLE_MAILBOX
 	tristate "Apple SoC mailboxes"
 	depends on PM
@@ -28,6 +38,20 @@ config APPLE_RTKIT
 
 	  Say 'y' here if you have an Apple SoC.
 
+config APPLE_RTKIT_HELPER
+	tristate "Apple Generic RTKit helper co-processor"
+	depends on APPLE_RTKIT
+	depends on ARCH_APPLE || COMPILE_TEST
+	default ARCH_APPLE
+	help
+	  Apple SoCs such as the M1 come with various co-processors running
+	  their proprietary RTKit operating system. This option enables support
+	  for a generic co-processor that does not implement any additional
+	  in-band communications. It can be used for testing purposes, or for
+	  coprocessors such as MTP that communicate over a different interface.
+
+	  Say 'y' here if you have an Apple SoC.
+
 config APPLE_SART
 	tristate "Apple SART DMA address filter"
 	depends on ARCH_APPLE || COMPILE_TEST
diff --git a/drivers/soc/apple/Makefile b/drivers/soc/apple/Makefile
index 4d9ab8f3037b..5e526a9edcf2 100644
--- a/drivers/soc/apple/Makefile
+++ b/drivers/soc/apple/Makefile
@@ -1,10 +1,16 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
+obj-$(CONFIG_APPLE_DOCKCHANNEL) += apple-dockchannel.o
+apple-dockchannel-y = dockchannel.o
+
 obj-$(CONFIG_APPLE_MAILBOX) += apple-mailbox.o
 apple-mailbox-y = mailbox.o
 
 obj-$(CONFIG_APPLE_RTKIT) += apple-rtkit.o
 apple-rtkit-y = rtkit.o rtkit-crashlog.o
 
+obj-$(CONFIG_APPLE_RTKIT_HELPER) += apple-rtkit-helper.o
+apple-rtkit-helper-y = rtkit-helper.o
+
 obj-$(CONFIG_APPLE_SART) += apple-sart.o
 apple-sart-y = sart.o
diff --git a/drivers/soc/apple/dockchannel.c b/drivers/soc/apple/dockchannel.c
new file mode 100644
index 000000000000..3a0d7964007c
--- /dev/null
+++ b/drivers/soc/apple/dockchannel.c
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Apple DockChannel FIFO driver
+ * Copyright The Asahi Linux Contributors
+ */
+
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/irq.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/platform_device.h>
+#include <linux/soc/apple/dockchannel.h>
+#include <linux/unaligned.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+
+#define DOCKCHANNEL_MAX_IRQ	32
+
+#define DOCKCHANNEL_TX_TIMEOUT_MS 1000
+#define DOCKCHANNEL_RX_TIMEOUT_MS 1000
+
+#define IRQ_MASK		0x0
+#define IRQ_FLAG		0x4
+
+#define IRQ_TX			BIT(0)
+#define IRQ_RX			BIT(1)
+
+#define CONFIG_TX_THRESH	0x0
+#define CONFIG_RX_THRESH	0x4
+
+#define DATA_TX8		0x4
+#define DATA_TX16		0x8
+#define DATA_TX24		0xc
+#define DATA_TX32		0x10
+#define DATA_TX_FREE		0x14
+#define DATA_RX8		0x1c
+#define DATA_RX16		0x20
+#define DATA_RX24		0x24
+#define DATA_RX32		0x28
+#define DATA_RX_COUNT		0x2c
+
+struct dockchannel {
+	struct device *dev;
+	int tx_irq;
+	int rx_irq;
+
+	void __iomem *config_base;
+	void __iomem *data_base;
+
+	u32 fifo_size;
+	bool awaiting;
+	struct completion tx_comp;
+	struct completion rx_comp;
+
+	void *cookie;
+	void (*data_available)(void *cookie, size_t avail);
+};
+
+struct dockchannel_common {
+	struct device *dev;
+	struct irq_domain *domain;
+	int irq;
+
+	void __iomem *irq_base;
+};
+
+/* Dockchannel FIFO functions */
+
+static irqreturn_t dockchannel_tx_irq(int irq, void *data)
+{
+	struct dockchannel *dockchannel = data;
+
+	disable_irq_nosync(irq);
+	complete(&dockchannel->tx_comp);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t dockchannel_rx_irq(int irq, void *data)
+{
+	struct dockchannel *dockchannel = data;
+
+	disable_irq_nosync(irq);
+
+	if (dockchannel->awaiting) {
+		return IRQ_WAKE_THREAD;
+	} else {
+		complete(&dockchannel->rx_comp);
+		return IRQ_HANDLED;
+	}
+}
+
+static irqreturn_t dockchannel_rx_irq_thread(int irq, void *data)
+{
+	struct dockchannel *dockchannel = data;
+	size_t avail = readl_relaxed(dockchannel->data_base + DATA_RX_COUNT);
+
+	dockchannel->awaiting = false;
+	dockchannel->data_available(dockchannel->cookie, avail);
+
+	return IRQ_HANDLED;
+}
+
+int dockchannel_send(struct dockchannel *dockchannel, const void *buf, size_t count)
+{
+	size_t left = count;
+	const u8 *p = buf;
+
+	while (left > 0) {
+		size_t avail = readl_relaxed(dockchannel->data_base + DATA_TX_FREE);
+		size_t block = min(left, avail);
+
+		if (avail == 0) {
+			size_t threshold = min((size_t)(dockchannel->fifo_size / 2), left);
+
+			writel_relaxed(threshold, dockchannel->config_base + CONFIG_TX_THRESH);
+			reinit_completion(&dockchannel->tx_comp);
+			enable_irq(dockchannel->tx_irq);
+
+			if (!wait_for_completion_timeout(&dockchannel->tx_comp,
+                                                 msecs_to_jiffies(DOCKCHANNEL_TX_TIMEOUT_MS))) {
+				disable_irq(dockchannel->tx_irq);
+				return -ETIMEDOUT;
+			}
+
+			continue;
+		}
+
+		while (block >= 4) {
+			writel_relaxed(get_unaligned_le32(p), dockchannel->data_base + DATA_TX32);
+			p += 4;
+			left -= 4;
+			block -= 4;
+		}
+		while (block > 0) {
+			writeb_relaxed(*p++, dockchannel->data_base + DATA_TX8);
+			left--;
+			block--;
+		}
+	}
+
+	return count;
+}
+EXPORT_SYMBOL(dockchannel_send);
+
+int dockchannel_recv(struct dockchannel *dockchannel, void *buf, size_t count)
+{
+	size_t left = count;
+	u8 *p = buf;
+
+	while (left > 0) {
+		size_t avail = readl_relaxed(dockchannel->data_base + DATA_RX_COUNT);
+		size_t block = min(left, avail);
+
+		if (avail == 0) {
+			size_t threshold = min((size_t)(dockchannel->fifo_size / 2), left);
+
+			writel_relaxed(threshold, dockchannel->config_base + CONFIG_RX_THRESH);
+			reinit_completion(&dockchannel->rx_comp);
+			enable_irq(dockchannel->rx_irq);
+
+			if (!wait_for_completion_timeout(&dockchannel->rx_comp,
+                                                 msecs_to_jiffies(DOCKCHANNEL_RX_TIMEOUT_MS))) {
+				disable_irq(dockchannel->rx_irq);
+				return -ETIMEDOUT;
+			}
+
+			continue;
+		}
+
+		while (block >= 4) {
+			put_unaligned_le32(readl_relaxed(dockchannel->data_base + DATA_RX32), p);
+			p += 4;
+			left -= 4;
+			block -= 4;
+		}
+		while (block > 0) {
+			*p++ = readl_relaxed(dockchannel->data_base + DATA_RX8) >> 8;
+			left--;
+			block--;
+		}
+	}
+
+	return count;
+}
+EXPORT_SYMBOL(dockchannel_recv);
+
+int dockchannel_await(struct dockchannel *dockchannel,
+			    void (*callback)(void *cookie, size_t avail),
+			    void *cookie, size_t count)
+{
+	size_t threshold = min((size_t)dockchannel->fifo_size, count);
+
+	if (!count) {
+		dockchannel->awaiting = false;
+		disable_irq(dockchannel->rx_irq);
+		return 0;
+	}
+
+	dockchannel->data_available = callback;
+	dockchannel->cookie = cookie;
+	dockchannel->awaiting = true;
+	writel_relaxed(threshold, dockchannel->config_base + CONFIG_RX_THRESH);
+	enable_irq(dockchannel->rx_irq);
+
+	return threshold;
+}
+EXPORT_SYMBOL(dockchannel_await);
+
+struct dockchannel *dockchannel_init(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct dockchannel *dockchannel;
+	int ret;
+
+	dockchannel = devm_kzalloc(dev, sizeof(*dockchannel), GFP_KERNEL);
+	if (!dockchannel)
+		return ERR_PTR(-ENOMEM);
+
+	dockchannel->dev = dev;
+	dockchannel->config_base = devm_platform_ioremap_resource_byname(pdev, "config");
+	if (IS_ERR(dockchannel->config_base))
+		return (__force void *)dockchannel->config_base;
+
+	dockchannel->data_base = devm_platform_ioremap_resource_byname(pdev, "data");
+	if (IS_ERR(dockchannel->data_base))
+		return (__force void *)dockchannel->data_base;
+
+	ret = of_property_read_u32(dev->of_node, "apple,fifo-size", &dockchannel->fifo_size);
+	if (ret)
+		return ERR_PTR(dev_err_probe(dev, ret, "Missing apple,fifo-size property"));
+
+	init_completion(&dockchannel->tx_comp);
+	init_completion(&dockchannel->rx_comp);
+
+	dockchannel->tx_irq = platform_get_irq_byname(pdev, "tx");
+	if (dockchannel->tx_irq <= 0) {
+		return ERR_PTR(dev_err_probe(dev, dockchannel->tx_irq,
+				     "Failed to get TX IRQ"));
+	}
+
+	dockchannel->rx_irq = platform_get_irq_byname(pdev, "rx");
+	if (dockchannel->rx_irq <= 0) {
+		return ERR_PTR(dev_err_probe(dev, dockchannel->rx_irq,
+				     "Failed to get RX IRQ"));
+	}
+
+	ret = devm_request_irq(dev, dockchannel->tx_irq, dockchannel_tx_irq, IRQF_NO_AUTOEN,
+			       "apple-dockchannel-tx", dockchannel);
+	if (ret)
+		return ERR_PTR(dev_err_probe(dev, ret, "Failed to request TX IRQ"));
+
+	ret = devm_request_threaded_irq(dev, dockchannel->rx_irq, dockchannel_rx_irq,
+					dockchannel_rx_irq_thread, IRQF_NO_AUTOEN,
+					"apple-dockchannel-rx", dockchannel);
+	if (ret)
+		return ERR_PTR(dev_err_probe(dev, ret, "Failed to request RX IRQ"));
+
+	return dockchannel;
+}
+EXPORT_SYMBOL(dockchannel_init);
+
+
+/* Dockchannel IRQchip */
+
+static void dockchannel_irq(struct irq_desc *desc)
+{
+	unsigned int irq = irq_desc_get_irq(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct dockchannel_common *dcc = irq_get_handler_data(irq);
+	unsigned long flags = readl_relaxed(dcc->irq_base + IRQ_FLAG);
+	int bit;
+
+	chained_irq_enter(chip, desc);
+
+	for_each_set_bit(bit, &flags, DOCKCHANNEL_MAX_IRQ)
+		generic_handle_domain_irq(dcc->domain, bit);
+
+	chained_irq_exit(chip, desc);
+}
+
+static void dockchannel_irq_ack(struct irq_data *data)
+{
+	struct dockchannel_common *dcc = irq_data_get_irq_chip_data(data);
+	unsigned int hwirq = data->hwirq;
+
+	writel_relaxed(BIT(hwirq), dcc->irq_base + IRQ_FLAG);
+}
+
+static void dockchannel_irq_mask(struct irq_data *data)
+{
+	struct dockchannel_common *dcc = irq_data_get_irq_chip_data(data);
+	unsigned int hwirq = data->hwirq;
+	u32 val = readl_relaxed(dcc->irq_base + IRQ_MASK);
+
+	writel_relaxed(val & ~BIT(hwirq), dcc->irq_base + IRQ_MASK);
+}
+
+static void dockchannel_irq_unmask(struct irq_data *data)
+{
+	struct dockchannel_common *dcc = irq_data_get_irq_chip_data(data);
+	unsigned int hwirq = data->hwirq;
+	u32 val = readl_relaxed(dcc->irq_base + IRQ_MASK);
+
+	writel_relaxed(val | BIT(hwirq), dcc->irq_base + IRQ_MASK);
+}
+
+static const struct irq_chip dockchannel_irqchip = {
+	.name = "dockchannel-irqc",
+	.irq_ack = dockchannel_irq_ack,
+	.irq_mask = dockchannel_irq_mask,
+	.irq_unmask = dockchannel_irq_unmask,
+};
+
+static int dockchannel_irq_domain_map(struct irq_domain *d, unsigned int virq,
+				      irq_hw_number_t hw)
+{
+	irq_set_chip_data(virq, d->host_data);
+	irq_set_chip_and_handler(virq, &dockchannel_irqchip, handle_level_irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops dockchannel_irq_domain_ops = {
+	.xlate	= irq_domain_xlate_twocell,
+	.map	= dockchannel_irq_domain_map,
+};
+
+static int dockchannel_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct dockchannel_common *dcc;
+	struct device_node *child;
+
+	dcc = devm_kzalloc(dev, sizeof(*dcc), GFP_KERNEL);
+	if (!dcc)
+		return -ENOMEM;
+
+	dcc->dev = dev;
+	platform_set_drvdata(pdev, dcc);
+
+	dcc->irq_base = devm_platform_ioremap_resource_byname(pdev, "irq");
+	if (IS_ERR(dcc->irq_base))
+		return PTR_ERR(dcc->irq_base);
+
+	writel_relaxed(0, dcc->irq_base + IRQ_MASK);
+	writel_relaxed(~0, dcc->irq_base + IRQ_FLAG);
+
+	dcc->domain = irq_domain_add_linear(dev->of_node, DOCKCHANNEL_MAX_IRQ,
+					    &dockchannel_irq_domain_ops, dcc);
+	if (!dcc->domain)
+		return -ENOMEM;
+
+	dcc->irq = platform_get_irq(pdev, 0);
+	if (dcc->irq <= 0)
+		return dev_err_probe(dev, dcc->irq, "Failed to get IRQ");
+
+	irq_set_handler_data(dcc->irq, dcc);
+	irq_set_chained_handler(dcc->irq, dockchannel_irq);
+
+	for_each_child_of_node(dev->of_node, child)
+		of_platform_device_create(child, NULL, dev);
+
+	return 0;
+}
+
+static void dockchannel_remove(struct platform_device *pdev)
+{
+	struct dockchannel_common *dcc = platform_get_drvdata(pdev);
+	int hwirq;
+
+	device_for_each_child(&pdev->dev, NULL, of_platform_device_destroy);
+
+	irq_set_chained_handler_and_data(dcc->irq, NULL, NULL);
+
+	for (hwirq = 0; hwirq < DOCKCHANNEL_MAX_IRQ; hwirq++)
+		irq_dispose_mapping(irq_find_mapping(dcc->domain, hwirq));
+
+	irq_domain_remove(dcc->domain);
+
+	writel_relaxed(0, dcc->irq_base + IRQ_MASK);
+	writel_relaxed(~0, dcc->irq_base + IRQ_FLAG);
+}
+
+static const struct of_device_id dockchannel_of_match[] = {
+	{ .compatible = "apple,dockchannel" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, dockchannel_of_match);
+
+static struct platform_driver dockchannel_driver = {
+	.driver = {
+		.name = "dockchannel",
+		.of_match_table = dockchannel_of_match,
+	},
+	.probe = dockchannel_probe,
+	.remove = dockchannel_remove,
+};
+module_platform_driver(dockchannel_driver);
+
+MODULE_AUTHOR("Hector Martin <marcan@marcan.st>");
+MODULE_LICENSE("Dual MIT/GPL");
+MODULE_DESCRIPTION("Apple DockChannel driver");
diff --git a/drivers/soc/apple/rtkit-helper.c b/drivers/soc/apple/rtkit-helper.c
new file mode 100644
index 000000000000..080d083ed9bd
--- /dev/null
+++ b/drivers/soc/apple/rtkit-helper.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Apple Generic RTKit helper coprocessor
+ * Copyright The Asahi Linux Contributors
+ */
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/soc/apple/rtkit.h>
+
+#define APPLE_ASC_CPU_CONTROL		0x44
+#define APPLE_ASC_CPU_CONTROL_RUN	BIT(4)
+
+struct apple_rtkit_helper {
+	struct device *dev;
+	struct apple_rtkit *rtk;
+
+	void __iomem *asc_base;
+
+	struct resource *sram;
+	void __iomem *sram_base;
+};
+
+static int apple_rtkit_helper_shmem_setup(void *cookie, struct apple_rtkit_shmem *bfr)
+{
+	struct apple_rtkit_helper *helper = cookie;
+	struct resource res = {
+		.start = bfr->iova,
+		.end = bfr->iova + bfr->size - 1,
+		.name = "rtkit_map",
+	};
+
+	if (!bfr->iova) {
+		bfr->buffer = dma_alloc_coherent(helper->dev, bfr->size,
+						    &bfr->iova, GFP_KERNEL);
+		if (!bfr->buffer)
+			return -ENOMEM;
+		return 0;
+	}
+
+	if (!helper->sram) {
+		dev_err(helper->dev,
+			"RTKit buffer request with no SRAM region: %pR", &res);
+		return -EFAULT;
+	}
+
+	res.flags = helper->sram->flags;
+
+	if (res.end < res.start || !resource_contains(helper->sram, &res)) {
+		dev_err(helper->dev,
+			"RTKit buffer request outside SRAM region: %pR", &res);
+		return -EFAULT;
+	}
+
+	bfr->iomem = helper->sram_base + (res.start - helper->sram->start);
+	bfr->is_mapped = true;
+
+	return 0;
+}
+
+static void apple_rtkit_helper_shmem_destroy(void *cookie, struct apple_rtkit_shmem *bfr)
+{
+	// no-op
+}
+
+static const struct apple_rtkit_ops apple_rtkit_helper_ops = {
+	.shmem_setup = apple_rtkit_helper_shmem_setup,
+	.shmem_destroy = apple_rtkit_helper_shmem_destroy,
+};
+
+static int apple_rtkit_helper_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct apple_rtkit_helper *helper;
+	int ret;
+
+	/* 44 bits for addresses in standard RTKit requests */
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
+	if (ret)
+		return ret;
+
+	helper = devm_kzalloc(dev, sizeof(*helper), GFP_KERNEL);
+	if (!helper)
+		return -ENOMEM;
+
+	helper->dev = dev;
+	platform_set_drvdata(pdev, helper);
+
+	helper->asc_base = devm_platform_ioremap_resource_byname(pdev, "asc");
+	if (IS_ERR(helper->asc_base))
+		return PTR_ERR(helper->asc_base);
+
+	helper->sram = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sram");
+	if (helper->sram) {
+		helper->sram_base = devm_ioremap_resource(dev, helper->sram);
+		if (IS_ERR(helper->sram_base))
+			return dev_err_probe(dev, PTR_ERR(helper->sram_base),
+					"Failed to map SRAM region");
+	}
+
+	helper->rtk =
+		devm_apple_rtkit_init(dev, helper, NULL, 0, &apple_rtkit_helper_ops);
+	if (IS_ERR(helper->rtk))
+		return dev_err_probe(dev, PTR_ERR(helper->rtk),
+				     "Failed to intialize RTKit");
+
+	writel_relaxed(APPLE_ASC_CPU_CONTROL_RUN,
+		       helper->asc_base + APPLE_ASC_CPU_CONTROL);
+
+	/* Works for both wake and boot */
+	ret = apple_rtkit_wake(helper->rtk);
+	if (ret != 0)
+		return dev_err_probe(dev, ret, "Failed to wake up coprocessor");
+
+	return 0;
+}
+
+static void apple_rtkit_helper_remove(struct platform_device *pdev)
+{
+	struct apple_rtkit_helper *helper = platform_get_drvdata(pdev);
+
+	if (apple_rtkit_is_running(helper->rtk))
+		apple_rtkit_quiesce(helper->rtk);
+
+	writel_relaxed(0, helper->asc_base + APPLE_ASC_CPU_CONTROL);
+}
+
+static const struct of_device_id apple_rtkit_helper_of_match[] = {
+	{ .compatible = "apple,rtk-helper-asc4" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, apple_rtkit_helper_of_match);
+
+static struct platform_driver apple_rtkit_helper_driver = {
+	.driver = {
+		.name = "rtkit-helper",
+		.of_match_table = apple_rtkit_helper_of_match,
+	},
+	.probe = apple_rtkit_helper_probe,
+	.remove = apple_rtkit_helper_remove,
+};
+module_platform_driver(apple_rtkit_helper_driver);
+
+MODULE_AUTHOR("Hector Martin <marcan@marcan.st>");
+MODULE_LICENSE("Dual MIT/GPL");
+MODULE_DESCRIPTION("Apple RTKit helper driver");
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 2f92cd698bef..cefe34038c57 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -48,4 +48,6 @@ source "drivers/staging/axis-fifo/Kconfig"
 
 source "drivers/staging/vme_user/Kconfig"
 
+source "drivers/staging/apple-bce/Kconfig"
+
 endif # STAGING
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index f5b8876aa536..cf24b596cde9 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_MOST)		+= most/
 obj-$(CONFIG_GREYBUS)		+= greybus/
 obj-$(CONFIG_BCM2835_VCHIQ)	+= vc04_services/
 obj-$(CONFIG_XIL_AXIS_FIFO)	+= axis-fifo/
+obj-$(CONFIG_APPLE_BCE)		+= apple-bce/
diff --git a/drivers/staging/apple-bce/Kconfig b/drivers/staging/apple-bce/Kconfig
new file mode 100644
index 000000000000..fe92bc441e89
--- /dev/null
+++ b/drivers/staging/apple-bce/Kconfig
@@ -0,0 +1,18 @@
+config APPLE_BCE
+	tristate "Apple BCE driver (VHCI and Audio support)"
+	default m
+	depends on X86
+	select SOUND
+	select SND
+	select SND_PCM
+	select SND_JACK
+	help
+	  VHCI and audio support on Apple MacBooks with the T2 Chip.
+	  This driver is divided in three components:
+	    - BCE (Buffer Copy Engine): which establishes a basic communication
+	      channel with the T2 chip. This component is required by the other two:
+	      - VHCI (Virtual Host Controller Interface): Access to keyboard, mouse
+	        and other system devices depend on this virtual USB host controller
+	      - Audio: a driver for the T2 audio interface.
+	 
+	  If "M" is selected, the module will be called apple-bce.'
diff --git a/drivers/staging/apple-bce/Makefile b/drivers/staging/apple-bce/Makefile
new file mode 100644
index 000000000000..8cfbd3f64af6
--- /dev/null
+++ b/drivers/staging/apple-bce/Makefile
@@ -0,0 +1,28 @@
+modname := apple-bce
+obj-$(CONFIG_APPLE_BCE) += $(modname).o
+
+apple-bce-objs := apple_bce.o mailbox.o queue.o queue_dma.o vhci/vhci.o vhci/queue.o vhci/transfer.o audio/audio.o audio/protocol.o audio/protocol_bce.o audio/pcm.o
+
+MY_CFLAGS += -DWITHOUT_NVME_PATCH
+#MY_CFLAGS += -g -DDEBUG
+ccflags-y += ${MY_CFLAGS}
+CC += ${MY_CFLAGS}
+
+KVERSION := $(KERNELRELEASE)
+ifeq ($(origin KERNELRELEASE), undefined)
+KVERSION := $(shell uname -r)
+endif
+
+KDIR := /lib/modules/$(KVERSION)/build
+PWD := $(shell pwd)
+
+.PHONY: all
+
+all:
+	$(MAKE) -C $(KDIR) M=$(PWD) modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(PWD) clean
+
+install:
+	$(MAKE) -C $(KDIR) M=$(PWD) modules_install
diff --git a/drivers/staging/apple-bce/apple_bce.c b/drivers/staging/apple-bce/apple_bce.c
new file mode 100644
index 000000000000..4fd2415d7028
--- /dev/null
+++ b/drivers/staging/apple-bce/apple_bce.c
@@ -0,0 +1,445 @@
+#include "apple_bce.h"
+#include <linux/module.h>
+#include <linux/crc32.h>
+#include "audio/audio.h"
+#include <linux/version.h>
+
+static dev_t bce_chrdev;
+static struct class *bce_class;
+
+struct apple_bce_device *global_bce;
+
+static int bce_create_command_queues(struct apple_bce_device *bce);
+static void bce_free_command_queues(struct apple_bce_device *bce);
+static irqreturn_t bce_handle_mb_irq(int irq, void *dev);
+static irqreturn_t bce_handle_dma_irq(int irq, void *dev);
+static int bce_fw_version_handshake(struct apple_bce_device *bce);
+static int bce_register_command_queue(struct apple_bce_device *bce, struct bce_queue_memcfg *cfg, int is_sq);
+
+static int apple_bce_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+    struct apple_bce_device *bce = NULL;
+    int status = 0;
+    int nvec;
+
+    pr_info("apple-bce: capturing our device\n");
+
+    if (pci_enable_device(dev))
+        return -ENODEV;
+    if (pci_request_regions(dev, "apple-bce")) {
+        status = -ENODEV;
+        goto fail;
+    }
+    pci_set_master(dev);
+    nvec = pci_alloc_irq_vectors(dev, 1, 8, PCI_IRQ_MSI);
+    if (nvec < 5) {
+        status = -EINVAL;
+        goto fail;
+    }
+
+    bce = kzalloc(sizeof(struct apple_bce_device), GFP_KERNEL);
+    if (!bce) {
+        status = -ENOMEM;
+        goto fail;
+    }
+
+    bce->pci = dev;
+    pci_set_drvdata(dev, bce);
+
+    bce->devt = bce_chrdev;
+    bce->dev = device_create(bce_class, &dev->dev, bce->devt, NULL, "apple-bce");
+    if (IS_ERR_OR_NULL(bce->dev)) {
+        status = PTR_ERR(bce_class);
+        goto fail;
+    }
+
+    bce->reg_mem_mb = pci_iomap(dev, 4, 0);
+    bce->reg_mem_dma = pci_iomap(dev, 2, 0);
+
+    if (IS_ERR_OR_NULL(bce->reg_mem_mb) || IS_ERR_OR_NULL(bce->reg_mem_dma)) {
+        dev_warn(&dev->dev, "apple-bce: Failed to pci_iomap required regions\n");
+        goto fail;
+    }
+
+    bce_mailbox_init(&bce->mbox, bce->reg_mem_mb);
+    bce_timestamp_init(&bce->timestamp, bce->reg_mem_mb);
+
+    spin_lock_init(&bce->queues_lock);
+    ida_init(&bce->queue_ida);
+
+    if ((status = pci_request_irq(dev, 0, bce_handle_mb_irq, NULL, dev, "bce_mbox")))
+        goto fail;
+    if ((status = pci_request_irq(dev, 4, NULL, bce_handle_dma_irq, dev, "bce_dma")))
+        goto fail_interrupt_0;
+
+    if ((status = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(37)))) {
+        dev_warn(&dev->dev, "dma: Setting mask failed\n");
+        goto fail_interrupt;
+    }
+
+    /* Gets the function 0's interface. This is needed because Apple only accepts DMA on our function if function 0
+       is a bus master, so we need to work around this. */
+    bce->pci0 = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
+#ifndef WITHOUT_NVME_PATCH
+    if ((status = pci_enable_device_mem(bce->pci0))) {
+        dev_warn(&dev->dev, "apple-bce: failed to enable function 0\n");
+        goto fail_dev0;
+    }
+#endif
+    pci_set_master(bce->pci0);
+
+    bce_timestamp_start(&bce->timestamp, true);
+
+    if ((status = bce_fw_version_handshake(bce)))
+        goto fail_ts;
+    pr_info("apple-bce: handshake done\n");
+
+    if ((status = bce_create_command_queues(bce))) {
+        pr_info("apple-bce: Creating command queues failed\n");
+        goto fail_ts;
+    }
+
+    global_bce = bce;
+
+    bce_vhci_create(bce, &bce->vhci);
+
+    return 0;
+
+fail_ts:
+    bce_timestamp_stop(&bce->timestamp);
+#ifndef WITHOUT_NVME_PATCH
+    pci_disable_device(bce->pci0);
+fail_dev0:
+#endif
+    pci_dev_put(bce->pci0);
+fail_interrupt:
+    pci_free_irq(dev, 4, dev);
+fail_interrupt_0:
+    pci_free_irq(dev, 0, dev);
+fail:
+    if (bce && bce->dev) {
+        device_destroy(bce_class, bce->devt);
+
+        if (!IS_ERR_OR_NULL(bce->reg_mem_mb))
+            pci_iounmap(dev, bce->reg_mem_mb);
+        if (!IS_ERR_OR_NULL(bce->reg_mem_dma))
+            pci_iounmap(dev, bce->reg_mem_dma);
+
+        kfree(bce);
+    }
+
+    pci_free_irq_vectors(dev);
+    pci_release_regions(dev);
+    pci_disable_device(dev);
+
+    if (!status)
+        status = -EINVAL;
+    return status;
+}
+
+static int bce_create_command_queues(struct apple_bce_device *bce)
+{
+    int status;
+    struct bce_queue_memcfg *cfg;
+
+    bce->cmd_cq = bce_alloc_cq(bce, 0, 0x20);
+    bce->cmd_cmdq = bce_alloc_cmdq(bce, 1, 0x20);
+    if (bce->cmd_cq == NULL || bce->cmd_cmdq == NULL) {
+        status = -ENOMEM;
+        goto err;
+    }
+    bce->queues[0] = (struct bce_queue *) bce->cmd_cq;
+    bce->queues[1] = (struct bce_queue *) bce->cmd_cmdq->sq;
+
+    cfg = kzalloc(sizeof(struct bce_queue_memcfg), GFP_KERNEL);
+    if (!cfg) {
+        status = -ENOMEM;
+        goto err;
+    }
+    bce_get_cq_memcfg(bce->cmd_cq, cfg);
+    if ((status = bce_register_command_queue(bce, cfg, false)))
+        goto err;
+    bce_get_sq_memcfg(bce->cmd_cmdq->sq, bce->cmd_cq, cfg);
+    if ((status = bce_register_command_queue(bce, cfg, true)))
+        goto err;
+    kfree(cfg);
+
+    return 0;
+
+err:
+    if (bce->cmd_cq)
+        bce_free_cq(bce, bce->cmd_cq);
+    if (bce->cmd_cmdq)
+        bce_free_cmdq(bce, bce->cmd_cmdq);
+    return status;
+}
+
+static void bce_free_command_queues(struct apple_bce_device *bce)
+{
+    bce_free_cq(bce, bce->cmd_cq);
+    bce_free_cmdq(bce, bce->cmd_cmdq);
+    bce->cmd_cq = NULL;
+    bce->queues[0] = NULL;
+}
+
+static irqreturn_t bce_handle_mb_irq(int irq, void *dev)
+{
+    struct apple_bce_device *bce = pci_get_drvdata(dev);
+    bce_mailbox_handle_interrupt(&bce->mbox);
+    return IRQ_HANDLED;
+}
+
+static irqreturn_t bce_handle_dma_irq(int irq, void *dev)
+{
+    int i;
+    struct apple_bce_device *bce = pci_get_drvdata(dev);
+    spin_lock(&bce->queues_lock);
+    for (i = 0; i < BCE_MAX_QUEUE_COUNT; i++)
+        if (bce->queues[i] && bce->queues[i]->type == BCE_QUEUE_CQ)
+            bce_handle_cq_completions(bce, (struct bce_queue_cq *) bce->queues[i]);
+    spin_unlock(&bce->queues_lock);
+    return IRQ_HANDLED;
+}
+
+static int bce_fw_version_handshake(struct apple_bce_device *bce)
+{
+    u64 result;
+    int status;
+
+    if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_SET_FW_PROTOCOL_VERSION, BC_PROTOCOL_VERSION),
+            &result)))
+        return status;
+    if (BCE_MB_TYPE(result) != BCE_MB_SET_FW_PROTOCOL_VERSION ||
+        BCE_MB_VALUE(result) != BC_PROTOCOL_VERSION) {
+        pr_err("apple-bce: FW version handshake failed %x:%llx\n", BCE_MB_TYPE(result), BCE_MB_VALUE(result));
+        return -EINVAL;
+    }
+    return 0;
+}
+
+static int bce_register_command_queue(struct apple_bce_device *bce, struct bce_queue_memcfg *cfg, int is_sq)
+{
+    int status;
+    int cmd_type;
+    u64 result;
+    // OS X uses an bidirectional direction, but that's not really needed
+    dma_addr_t a = dma_map_single(&bce->pci->dev, cfg, sizeof(struct bce_queue_memcfg), DMA_TO_DEVICE);
+    if (dma_mapping_error(&bce->pci->dev, a))
+        return -ENOMEM;
+    cmd_type = is_sq ? BCE_MB_REGISTER_COMMAND_SQ : BCE_MB_REGISTER_COMMAND_CQ;
+    status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(cmd_type, a), &result);
+    dma_unmap_single(&bce->pci->dev, a, sizeof(struct bce_queue_memcfg), DMA_TO_DEVICE);
+    if (status)
+        return status;
+    if (BCE_MB_TYPE(result) != BCE_MB_REGISTER_COMMAND_QUEUE_REPLY)
+        return -EINVAL;
+    return 0;
+}
+
+static void apple_bce_remove(struct pci_dev *dev)
+{
+    struct apple_bce_device *bce = pci_get_drvdata(dev);
+    bce->is_being_removed = true;
+
+    bce_vhci_destroy(&bce->vhci);
+
+    bce_timestamp_stop(&bce->timestamp);
+#ifndef WITHOUT_NVME_PATCH
+    pci_disable_device(bce->pci0);
+#endif
+    pci_dev_put(bce->pci0);
+    pci_free_irq(dev, 0, dev);
+    pci_free_irq(dev, 4, dev);
+    bce_free_command_queues(bce);
+    pci_iounmap(dev, bce->reg_mem_mb);
+    pci_iounmap(dev, bce->reg_mem_dma);
+    device_destroy(bce_class, bce->devt);
+    pci_free_irq_vectors(dev);
+    pci_release_regions(dev);
+    pci_disable_device(dev);
+    kfree(bce);
+}
+
+static int bce_save_state_and_sleep(struct apple_bce_device *bce)
+{
+    int attempt, status = 0;
+    u64 resp;
+    dma_addr_t dma_addr;
+    void *dma_ptr = NULL;
+    size_t size = max(PAGE_SIZE, 4096UL);
+
+    for (attempt = 0; attempt < 5; ++attempt) {
+        pr_debug("apple-bce: suspend: attempt %i, buffer size %li\n", attempt, size);
+        dma_ptr = dma_alloc_coherent(&bce->pci->dev, size, &dma_addr, GFP_KERNEL);
+        if (!dma_ptr) {
+            pr_err("apple-bce: suspend failed (data alloc failed)\n");
+            break;
+        }
+        BUG_ON((dma_addr % 4096) != 0);
+        status = bce_mailbox_send(&bce->mbox,
+                BCE_MB_MSG(BCE_MB_SAVE_STATE_AND_SLEEP, (dma_addr & ~(4096LLU - 1)) | (size / 4096)), &resp);
+        if (status) {
+            pr_err("apple-bce: suspend failed (mailbox send)\n");
+            break;
+        }
+        if (BCE_MB_TYPE(resp) == BCE_MB_SAVE_RESTORE_STATE_COMPLETE) {
+            bce->saved_data_dma_addr = dma_addr;
+            bce->saved_data_dma_ptr = dma_ptr;
+            bce->saved_data_dma_size = size;
+            return 0;
+        } else if (BCE_MB_TYPE(resp) == BCE_MB_SAVE_STATE_AND_SLEEP_FAILURE) {
+            dma_free_coherent(&bce->pci->dev, size, dma_ptr, dma_addr);
+            /* The 0x10ff magic value was extracted from Apple's driver */
+            size = (BCE_MB_VALUE(resp) + 0x10ff) & ~(4096LLU - 1);
+            pr_debug("apple-bce: suspend: device requested a larger buffer (%li)\n", size);
+            continue;
+        } else {
+            pr_err("apple-bce: suspend failed (invalid device response)\n");
+            status = -EINVAL;
+            break;
+        }
+    }
+    if (dma_ptr)
+        dma_free_coherent(&bce->pci->dev, size, dma_ptr, dma_addr);
+    if (!status)
+        return bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_SLEEP_NO_STATE, 0), &resp);
+    return status;
+}
+
+static int bce_restore_state_and_wake(struct apple_bce_device *bce)
+{
+    int status;
+    u64 resp;
+    if (!bce->saved_data_dma_ptr) {
+        if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_RESTORE_NO_STATE, 0), &resp))) {
+            pr_err("apple-bce: resume with no state failed (mailbox send)\n");
+            return status;
+        }
+        if (BCE_MB_TYPE(resp) != BCE_MB_RESTORE_NO_STATE) {
+            pr_err("apple-bce: resume with no state failed (invalid device response)\n");
+            return -EINVAL;
+        }
+        return 0;
+    }
+
+    if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_RESTORE_STATE_AND_WAKE,
+            (bce->saved_data_dma_addr & ~(4096LLU - 1)) | (bce->saved_data_dma_size / 4096)), &resp))) {
+        pr_err("apple-bce: resume with state failed (mailbox send)\n");
+        goto finish_with_state;
+    }
+    if (BCE_MB_TYPE(resp) != BCE_MB_SAVE_RESTORE_STATE_COMPLETE) {
+        pr_err("apple-bce: resume with state failed (invalid device response)\n");
+        status = -EINVAL;
+        goto finish_with_state;
+    }
+
+finish_with_state:
+    dma_free_coherent(&bce->pci->dev, bce->saved_data_dma_size, bce->saved_data_dma_ptr, bce->saved_data_dma_addr);
+    bce->saved_data_dma_ptr = NULL;
+    return status;
+}
+
+static int apple_bce_suspend(struct device *dev)
+{
+    struct apple_bce_device *bce = pci_get_drvdata(to_pci_dev(dev));
+    int status;
+
+    bce_timestamp_stop(&bce->timestamp);
+
+    if ((status = bce_save_state_and_sleep(bce)))
+        return status;
+
+    return 0;
+}
+
+static int apple_bce_resume(struct device *dev)
+{
+    struct apple_bce_device *bce = pci_get_drvdata(to_pci_dev(dev));
+    int status;
+
+    pci_set_master(bce->pci);
+    pci_set_master(bce->pci0);
+
+    if ((status = bce_restore_state_and_wake(bce)))
+        return status;
+
+    bce_timestamp_start(&bce->timestamp, false);
+
+    return 0;
+}
+
+static struct pci_device_id apple_bce_ids[  ] = {
+        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x1801) },
+        { 0, },
+};
+
+MODULE_DEVICE_TABLE(pci, apple_bce_ids);
+
+struct dev_pm_ops apple_bce_pci_driver_pm = {
+        .suspend = apple_bce_suspend,
+        .resume = apple_bce_resume
+};
+struct pci_driver apple_bce_pci_driver = {
+        .name = "apple-bce",
+        .id_table = apple_bce_ids,
+        .probe = apple_bce_probe,
+        .remove = apple_bce_remove,
+        .driver = {
+                .pm = &apple_bce_pci_driver_pm
+        }
+};
+
+
+static int __init apple_bce_module_init(void)
+{
+    int result;
+    if ((result = alloc_chrdev_region(&bce_chrdev, 0, 1, "apple-bce")))
+        goto fail_chrdev;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0)
+    bce_class = class_create(THIS_MODULE, "apple-bce");
+#else
+    bce_class = class_create("apple-bce");
+#endif
+    if (IS_ERR(bce_class)) {
+        result = PTR_ERR(bce_class);
+        goto fail_class;
+    }
+    if ((result = bce_vhci_module_init())) {
+        pr_err("apple-bce: bce-vhci init failed");
+        goto fail_class;
+    }
+
+    result = pci_register_driver(&apple_bce_pci_driver);
+    if (result)
+        goto fail_drv;
+
+    aaudio_module_init();
+
+    return 0;
+
+fail_drv:
+    pci_unregister_driver(&apple_bce_pci_driver);
+fail_class:
+    class_destroy(bce_class);
+fail_chrdev:
+    unregister_chrdev_region(bce_chrdev, 1);
+    if (!result)
+        result = -EINVAL;
+    return result;
+}
+static void __exit apple_bce_module_exit(void)
+{
+    pci_unregister_driver(&apple_bce_pci_driver);
+
+    aaudio_module_exit();
+    bce_vhci_module_exit();
+    class_destroy(bce_class);
+    unregister_chrdev_region(bce_chrdev, 1);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("MrARM");
+MODULE_DESCRIPTION("Apple BCE Driver");
+MODULE_VERSION("0.01");
+module_init(apple_bce_module_init);
+module_exit(apple_bce_module_exit);
diff --git a/drivers/staging/apple-bce/apple_bce.h b/drivers/staging/apple-bce/apple_bce.h
new file mode 100644
index 000000000000..58dbeff79e43
--- /dev/null
+++ b/drivers/staging/apple-bce/apple_bce.h
@@ -0,0 +1,41 @@
+#ifndef APPLE_BCE_H
+#define APPLE_BCE_H
+
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include "mailbox.h"
+#include "queue.h"
+#include "vhci/vhci.h"
+
+#define BC_PROTOCOL_VERSION 0x20001
+#define BCE_MAX_QUEUE_COUNT 0x100
+
+#define BCE_QUEUE_USER_MIN 2
+#define BCE_QUEUE_USER_MAX (BCE_MAX_QUEUE_COUNT - 1)
+
+struct apple_bce_device {
+    struct pci_dev *pci, *pci0;
+    dev_t devt;
+    struct device *dev;
+    void __iomem *reg_mem_mb;
+    void __iomem *reg_mem_dma;
+    struct bce_mailbox mbox;
+    struct bce_timestamp timestamp;
+    struct bce_queue *queues[BCE_MAX_QUEUE_COUNT];
+    struct spinlock queues_lock;
+    struct ida queue_ida;
+    struct bce_queue_cq *cmd_cq;
+    struct bce_queue_cmdq *cmd_cmdq;
+    struct bce_queue_sq *int_sq_list[BCE_MAX_QUEUE_COUNT];
+    bool is_being_removed;
+
+    dma_addr_t saved_data_dma_addr;
+    void *saved_data_dma_ptr;
+    size_t saved_data_dma_size;
+
+    struct bce_vhci vhci;
+};
+
+extern struct apple_bce_device *global_bce;
+
+#endif //APPLE_BCE_H
diff --git a/drivers/staging/apple-bce/audio/audio.c b/drivers/staging/apple-bce/audio/audio.c
new file mode 100644
index 000000000000..bd16ddd16c1d
--- /dev/null
+++ b/drivers/staging/apple-bce/audio/audio.c
@@ -0,0 +1,711 @@
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <sound/core.h>
+#include <sound/initval.h>
+#include <sound/pcm.h>
+#include <sound/jack.h>
+#include "audio.h"
+#include "pcm.h"
+#include <linux/version.h>
+
+static int aaudio_alsa_index = SNDRV_DEFAULT_IDX1;
+static char *aaudio_alsa_id = SNDRV_DEFAULT_STR1;
+
+static dev_t aaudio_chrdev;
+static struct class *aaudio_class;
+
+static int aaudio_init_cmd(struct aaudio_device *a);
+static int aaudio_init_bs(struct aaudio_device *a);
+static void aaudio_init_dev(struct aaudio_device *a, aaudio_device_id_t dev_id);
+static void aaudio_free_dev(struct aaudio_subdevice *sdev);
+
+static int aaudio_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+    struct aaudio_device *aaudio = NULL;
+    struct aaudio_subdevice *sdev = NULL;
+    int status = 0;
+    u32 cfg;
+
+    pr_info("aaudio: capturing our device\n");
+
+    if (pci_enable_device(dev))
+        return -ENODEV;
+    if (pci_request_regions(dev, "aaudio")) {
+        status = -ENODEV;
+        goto fail;
+    }
+    pci_set_master(dev);
+
+    aaudio = kzalloc(sizeof(struct aaudio_device), GFP_KERNEL);
+    if (!aaudio) {
+        status = -ENOMEM;
+        goto fail;
+    }
+
+    aaudio->bce = global_bce;
+    if (!aaudio->bce) {
+        dev_warn(&dev->dev, "aaudio: No BCE available\n");
+        status = -EINVAL;
+        goto fail;
+    }
+
+    aaudio->pci = dev;
+    pci_set_drvdata(dev, aaudio);
+
+    aaudio->devt = aaudio_chrdev;
+    aaudio->dev = device_create(aaudio_class, &dev->dev, aaudio->devt, NULL, "aaudio");
+    if (IS_ERR_OR_NULL(aaudio->dev)) {
+        status = PTR_ERR(aaudio_class);
+        goto fail;
+    }
+    device_link_add(aaudio->dev, aaudio->bce->dev, DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_CONSUMER);
+
+    init_completion(&aaudio->remote_alive);
+    INIT_LIST_HEAD(&aaudio->subdevice_list);
+
+    /* Init: set an unknown flag in the bitset */
+    if (pci_read_config_dword(dev, 4, &cfg))
+        dev_warn(&dev->dev, "aaudio: pci_read_config_dword fail\n");
+    if (pci_write_config_dword(dev, 4, cfg | 6u))
+        dev_warn(&dev->dev, "aaudio: pci_write_config_dword fail\n");
+
+    dev_info(aaudio->dev, "aaudio: bs len = %llx\n", pci_resource_len(dev, 0));
+    aaudio->reg_mem_bs_dma = pci_resource_start(dev, 0);
+    aaudio->reg_mem_bs = pci_iomap(dev, 0, 0);
+    aaudio->reg_mem_cfg = pci_iomap(dev, 4, 0);
+
+    aaudio->reg_mem_gpr = (u32 __iomem *) ((u8 __iomem *) aaudio->reg_mem_cfg + 0xC000);
+
+    if (IS_ERR_OR_NULL(aaudio->reg_mem_bs) || IS_ERR_OR_NULL(aaudio->reg_mem_cfg)) {
+        dev_warn(&dev->dev, "aaudio: Failed to pci_iomap required regions\n");
+        goto fail;
+    }
+
+    if (aaudio_bce_init(aaudio)) {
+        dev_warn(&dev->dev, "aaudio: Failed to init BCE command transport\n");
+        goto fail;
+    }
+
+    if (snd_card_new(aaudio->dev, aaudio_alsa_index, aaudio_alsa_id, THIS_MODULE, 0, &aaudio->card)) {
+        dev_err(&dev->dev, "aaudio: Failed to create ALSA card\n");
+        goto fail;
+    }
+
+    strcpy(aaudio->card->shortname, "Apple T2 Audio");
+    strcpy(aaudio->card->longname, "Apple T2 Audio");
+    strcpy(aaudio->card->mixername, "Apple T2 Audio");
+    /* Dynamic alsa ids start at 100 */
+    aaudio->next_alsa_id = 100;
+
+    if (aaudio_init_cmd(aaudio)) {
+        dev_err(&dev->dev, "aaudio: Failed to initialize over BCE\n");
+        goto fail_snd;
+    }
+
+    if (aaudio_init_bs(aaudio)) {
+        dev_err(&dev->dev, "aaudio: Failed to initialize BufferStruct\n");
+        goto fail_snd;
+    }
+
+    if ((status = aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_ON))) {
+        dev_err(&dev->dev, "Failed to set remote access\n");
+        return status;
+    }
+
+    if (snd_card_register(aaudio->card)) {
+        dev_err(&dev->dev, "aaudio: Failed to register ALSA sound device\n");
+        goto fail_snd;
+    }
+
+    list_for_each_entry(sdev, &aaudio->subdevice_list, list) {
+        struct aaudio_buffer_struct_device *dev = &aaudio->bs->devices[sdev->buf_id];
+
+        if (sdev->out_stream_cnt == 1 && !strcmp(dev->name, "Speaker")) {
+            struct snd_pcm_hardware *hw = sdev->out_streams[0].alsa_hw_desc;
+
+            snprintf(aaudio->card->driver, sizeof(aaudio->card->driver) / sizeof(char), "AppleT2x%d", hw->channels_min);
+        }
+    }
+
+    return 0;
+
+fail_snd:
+    snd_card_free(aaudio->card);
+fail:
+    if (aaudio && aaudio->dev)
+        device_destroy(aaudio_class, aaudio->devt);
+    kfree(aaudio);
+
+    if (!IS_ERR_OR_NULL(aaudio->reg_mem_bs))
+        pci_iounmap(dev, aaudio->reg_mem_bs);
+    if (!IS_ERR_OR_NULL(aaudio->reg_mem_cfg))
+        pci_iounmap(dev, aaudio->reg_mem_cfg);
+
+    pci_release_regions(dev);
+    pci_disable_device(dev);
+
+    if (!status)
+        status = -EINVAL;
+    return status;
+}
+
+
+
+static void aaudio_remove(struct pci_dev *dev)
+{
+    struct aaudio_subdevice *sdev;
+    struct aaudio_device *aaudio = pci_get_drvdata(dev);
+
+    snd_card_free(aaudio->card);
+    while (!list_empty(&aaudio->subdevice_list)) {
+        sdev = list_first_entry(&aaudio->subdevice_list, struct aaudio_subdevice, list);
+        list_del(&sdev->list);
+        aaudio_free_dev(sdev);
+    }
+    pci_iounmap(dev, aaudio->reg_mem_bs);
+    pci_iounmap(dev, aaudio->reg_mem_cfg);
+    device_destroy(aaudio_class, aaudio->devt);
+    pci_free_irq_vectors(dev);
+    pci_release_regions(dev);
+    pci_disable_device(dev);
+    kfree(aaudio);
+}
+
+static int aaudio_suspend(struct device *dev)
+{
+    struct aaudio_device *aaudio = pci_get_drvdata(to_pci_dev(dev));
+
+    if (aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_OFF))
+        dev_warn(aaudio->dev, "Failed to reset remote access\n");
+
+    pci_disable_device(aaudio->pci);
+    return 0;
+}
+
+static int aaudio_resume(struct device *dev)
+{
+    int status;
+    struct aaudio_device *aaudio = pci_get_drvdata(to_pci_dev(dev));
+
+    if ((status = pci_enable_device(aaudio->pci)))
+        return status;
+    pci_set_master(aaudio->pci);
+
+    if ((status = aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_ON))) {
+        dev_err(aaudio->dev, "Failed to set remote access\n");
+        return status;
+    }
+
+    return 0;
+}
+
+static int aaudio_init_cmd(struct aaudio_device *a)
+{
+    int status;
+    struct aaudio_send_ctx sctx;
+    struct aaudio_msg buf;
+    u64 dev_cnt, dev_i;
+    aaudio_device_id_t *dev_l;
+
+    if ((status = aaudio_send(a, &sctx, 500,
+                              aaudio_msg_write_alive_notification, 1, 3))) {
+        dev_err(a->dev, "Sending alive notification failed\n");
+        return status;
+    }
+
+    if (wait_for_completion_timeout(&a->remote_alive, msecs_to_jiffies(500)) == 0) {
+        dev_err(a->dev, "Timed out waiting for remote\n");
+        return -ETIMEDOUT;
+    }
+    dev_info(a->dev, "Continuing init\n");
+
+    buf = aaudio_reply_alloc();
+    if ((status = aaudio_cmd_get_device_list(a, &buf, &dev_l, &dev_cnt))) {
+        dev_err(a->dev, "Failed to get device list\n");
+        aaudio_reply_free(&buf);
+        return status;
+    }
+    for (dev_i = 0; dev_i < dev_cnt; ++dev_i)
+        aaudio_init_dev(a, dev_l[dev_i]);
+    aaudio_reply_free(&buf);
+
+    return 0;
+}
+
+static void aaudio_init_stream_info(struct aaudio_subdevice *sdev, struct aaudio_stream *strm);
+static void aaudio_handle_jack_connection_change(struct aaudio_subdevice *sdev);
+
+static void aaudio_init_dev(struct aaudio_device *a, aaudio_device_id_t dev_id)
+{
+    struct aaudio_subdevice *sdev;
+    struct aaudio_msg buf = aaudio_reply_alloc();
+    u64 uid_len, stream_cnt, i;
+    aaudio_object_id_t *stream_list;
+    char *uid;
+
+    sdev = kzalloc(sizeof(struct aaudio_subdevice), GFP_KERNEL);
+
+    if (aaudio_cmd_get_property(a, &buf, dev_id, dev_id, AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_UID, 0),
+            NULL, 0, (void **) &uid, &uid_len) || uid_len > AAUDIO_DEVICE_MAX_UID_LEN) {
+        dev_err(a->dev, "Failed to get device uid for device %llx\n", dev_id);
+        goto fail;
+    }
+    dev_info(a->dev, "Remote device %llx %.*s\n", dev_id, (int) uid_len, uid);
+
+    sdev->a = a;
+    INIT_LIST_HEAD(&sdev->list);
+    sdev->dev_id = dev_id;
+    sdev->buf_id = AAUDIO_BUFFER_ID_NONE;
+    strncpy(sdev->uid, uid, uid_len);
+    sdev->uid[uid_len + 1] = '\0';
+
+    if (aaudio_cmd_get_primitive_property(a, dev_id, dev_id,
+            AAUDIO_PROP(AAUDIO_PROP_SCOPE_INPUT, AAUDIO_PROP_LATENCY, 0), NULL, 0, &sdev->in_latency, sizeof(u32)))
+        dev_warn(a->dev, "Failed to query device input latency\n");
+    if (aaudio_cmd_get_primitive_property(a, dev_id, dev_id,
+            AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_LATENCY, 0), NULL, 0, &sdev->out_latency, sizeof(u32)))
+        dev_warn(a->dev, "Failed to query device output latency\n");
+
+    if (aaudio_cmd_get_input_stream_list(a, &buf, dev_id, &stream_list, &stream_cnt)) {
+        dev_err(a->dev, "Failed to get input stream list for device %llx\n", dev_id);
+        goto fail;
+    }
+    if (stream_cnt > AAUDIO_DEIVCE_MAX_INPUT_STREAMS) {
+        dev_warn(a->dev, "Device %s input stream count %llu is larger than the supported count of %u\n",
+                sdev->uid, stream_cnt, AAUDIO_DEIVCE_MAX_INPUT_STREAMS);
+        stream_cnt = AAUDIO_DEIVCE_MAX_INPUT_STREAMS;
+    }
+    sdev->in_stream_cnt = stream_cnt;
+    for (i = 0; i < stream_cnt; i++) {
+        sdev->in_streams[i].id = stream_list[i];
+        sdev->in_streams[i].buffer_cnt = 0;
+        aaudio_init_stream_info(sdev, &sdev->in_streams[i]);
+        sdev->in_streams[i].latency += sdev->in_latency;
+    }
+
+    if (aaudio_cmd_get_output_stream_list(a, &buf, dev_id, &stream_list, &stream_cnt)) {
+        dev_err(a->dev, "Failed to get output stream list for device %llx\n", dev_id);
+        goto fail;
+    }
+    if (stream_cnt > AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS) {
+        dev_warn(a->dev, "Device %s input stream count %llu is larger than the supported count of %u\n",
+                 sdev->uid, stream_cnt, AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS);
+        stream_cnt = AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS;
+    }
+    sdev->out_stream_cnt = stream_cnt;
+    for (i = 0; i < stream_cnt; i++) {
+        sdev->out_streams[i].id = stream_list[i];
+        sdev->out_streams[i].buffer_cnt = 0;
+        aaudio_init_stream_info(sdev, &sdev->out_streams[i]);
+        sdev->out_streams[i].latency += sdev->in_latency;
+    }
+
+    if (sdev->is_pcm)
+        aaudio_create_pcm(sdev);
+    /* Headphone Jack status */
+    if (!strcmp(sdev->uid, "Codec Output")) {
+        if (snd_jack_new(a->card, sdev->uid, SND_JACK_HEADPHONE, &sdev->jack, true, false))
+            dev_warn(a->dev, "Failed to create an attached jack for %s\n", sdev->uid);
+        aaudio_cmd_property_listener(a, sdev->dev_id, sdev->dev_id,
+                AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_JACK_PLUGGED, 0));
+        aaudio_handle_jack_connection_change(sdev);
+    }
+
+    aaudio_reply_free(&buf);
+
+    list_add_tail(&sdev->list, &a->subdevice_list);
+    return;
+
+fail:
+    aaudio_reply_free(&buf);
+    kfree(sdev);
+}
+
+static void aaudio_init_stream_info(struct aaudio_subdevice *sdev, struct aaudio_stream *strm)
+{
+    if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, strm->id,
+            AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_PHYS_FORMAT, 0), NULL, 0,
+            &strm->desc, sizeof(strm->desc)))
+        dev_warn(sdev->a->dev, "Failed to query stream descriptor\n");
+    if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, strm->id,
+            AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_LATENCY, 0), NULL, 0, &strm->latency, sizeof(u32)))
+        dev_warn(sdev->a->dev, "Failed to query stream latency\n");
+    if (strm->desc.format_id == AAUDIO_FORMAT_LPCM)
+        sdev->is_pcm = true;
+}
+
+static void aaudio_free_dev(struct aaudio_subdevice *sdev)
+{
+    size_t i;
+    for (i = 0; i < sdev->in_stream_cnt; i++) {
+        if (sdev->in_streams[i].alsa_hw_desc)
+            kfree(sdev->in_streams[i].alsa_hw_desc);
+        if (sdev->in_streams[i].buffers)
+            kfree(sdev->in_streams[i].buffers);
+    }
+    for (i = 0; i < sdev->out_stream_cnt; i++) {
+        if (sdev->out_streams[i].alsa_hw_desc)
+            kfree(sdev->out_streams[i].alsa_hw_desc);
+        if (sdev->out_streams[i].buffers)
+            kfree(sdev->out_streams[i].buffers);
+    }
+    kfree(sdev);
+}
+
+static struct aaudio_subdevice *aaudio_find_dev_by_dev_id(struct aaudio_device *a, aaudio_device_id_t dev_id)
+{
+    struct aaudio_subdevice *sdev;
+    list_for_each_entry(sdev, &a->subdevice_list, list) {
+        if (dev_id == sdev->dev_id)
+            return sdev;
+    }
+    return NULL;
+}
+
+static struct aaudio_subdevice *aaudio_find_dev_by_uid(struct aaudio_device *a, const char *uid)
+{
+    struct aaudio_subdevice *sdev;
+    list_for_each_entry(sdev, &a->subdevice_list, list) {
+        if (!strcmp(uid, sdev->uid))
+            return sdev;
+    }
+    return NULL;
+}
+
+static void aaudio_init_bs_stream(struct aaudio_device *a, struct aaudio_stream *strm,
+        struct aaudio_buffer_struct_stream *bs_strm);
+static void aaudio_init_bs_stream_host(struct aaudio_device *a, struct aaudio_stream *strm,
+        struct aaudio_buffer_struct_stream *bs_strm);
+
+static int aaudio_init_bs(struct aaudio_device *a)
+{
+    int i, j;
+    struct aaudio_buffer_struct_device *dev;
+    struct aaudio_subdevice *sdev;
+    u32 ver, sig, bs_base;
+
+    ver = ioread32(&a->reg_mem_gpr[0]);
+    if (ver < 3) {
+        dev_err(a->dev, "aaudio: Bad GPR version (%u)", ver);
+        return -EINVAL;
+    }
+    sig = ioread32(&a->reg_mem_gpr[1]);
+    if (sig != AAUDIO_SIG) {
+        dev_err(a->dev, "aaudio: Bad GPR sig (%x)", sig);
+        return -EINVAL;
+    }
+    bs_base = ioread32(&a->reg_mem_gpr[2]);
+    a->bs = (struct aaudio_buffer_struct *) ((u8 *) a->reg_mem_bs + bs_base);
+    if (a->bs->signature != AAUDIO_SIG) {
+        dev_err(a->dev, "aaudio: Bad BufferStruct sig (%x)", a->bs->signature);
+        return -EINVAL;
+    }
+    dev_info(a->dev, "aaudio: BufferStruct ver = %i\n", a->bs->version);
+    dev_info(a->dev, "aaudio: Num devices = %i\n", a->bs->num_devices);
+    for (i = 0; i < a->bs->num_devices; i++) {
+        dev = &a->bs->devices[i];
+        dev_info(a->dev, "aaudio: Device %i %s\n", i, dev->name);
+
+        sdev = aaudio_find_dev_by_uid(a, dev->name);
+        if (!sdev) {
+            dev_err(a->dev, "aaudio: Subdevice not found for BufferStruct device %s\n", dev->name);
+            continue;
+        }
+        sdev->buf_id = (u8) i;
+        dev->num_input_streams = 0;
+        for (j = 0; j < dev->num_output_streams; j++) {
+            dev_info(a->dev, "aaudio: Device %i Stream %i: Output; Buffer Count = %i\n", i, j,
+                     dev->output_streams[j].num_buffers);
+            if (j < sdev->out_stream_cnt)
+                aaudio_init_bs_stream(a, &sdev->out_streams[j], &dev->output_streams[j]);
+        }
+    }
+
+    list_for_each_entry(sdev, &a->subdevice_list, list) {
+        if (sdev->buf_id != AAUDIO_BUFFER_ID_NONE)
+            continue;
+        sdev->buf_id = i;
+        dev_info(a->dev, "aaudio: Created device %i %s\n", i, sdev->uid);
+        strcpy(a->bs->devices[i].name, sdev->uid);
+        a->bs->devices[i].num_input_streams = 0;
+        a->bs->devices[i].num_output_streams = 0;
+        a->bs->num_devices = ++i;
+    }
+    list_for_each_entry(sdev, &a->subdevice_list, list) {
+        if (sdev->in_stream_cnt == 1) {
+            dev_info(a->dev, "aaudio: Device %i Host Stream; Input\n", sdev->buf_id);
+            aaudio_init_bs_stream_host(a, &sdev->in_streams[0], &a->bs->devices[sdev->buf_id].input_streams[0]);
+            a->bs->devices[sdev->buf_id].num_input_streams = 1;
+            wmb();
+
+            if (aaudio_cmd_set_input_stream_address_ranges(a, sdev->dev_id)) {
+                dev_err(a->dev, "aaudio: Failed to set input stream address ranges\n");
+            }
+        }
+    }
+
+    return 0;
+}
+
+static void aaudio_init_bs_stream(struct aaudio_device *a, struct aaudio_stream *strm,
+                                  struct aaudio_buffer_struct_stream *bs_strm)
+{
+    size_t i;
+    strm->buffer_cnt = bs_strm->num_buffers;
+    if (bs_strm->num_buffers > AAUDIO_DEIVCE_MAX_BUFFER_COUNT) {
+        dev_warn(a->dev, "BufferStruct buffer count %u exceeds driver limit of %u\n", bs_strm->num_buffers,
+                AAUDIO_DEIVCE_MAX_BUFFER_COUNT);
+        strm->buffer_cnt = AAUDIO_DEIVCE_MAX_BUFFER_COUNT;
+    }
+    if (!strm->buffer_cnt)
+        return;
+    strm->buffers = kmalloc_array(strm->buffer_cnt, sizeof(struct aaudio_dma_buf), GFP_KERNEL);
+    if (!strm->buffers) {
+        dev_err(a->dev, "Buffer list allocation failed\n");
+        return;
+    }
+    for (i = 0; i < strm->buffer_cnt; i++) {
+        strm->buffers[i].dma_addr = a->reg_mem_bs_dma + (dma_addr_t) bs_strm->buffers[i].address;
+        strm->buffers[i].ptr = a->reg_mem_bs + bs_strm->buffers[i].address;
+        strm->buffers[i].size = bs_strm->buffers[i].size;
+    }
+
+    if (strm->buffer_cnt == 1) {
+        strm->alsa_hw_desc = kmalloc(sizeof(struct snd_pcm_hardware), GFP_KERNEL);
+        if (aaudio_create_hw_info(&strm->desc, strm->alsa_hw_desc, strm->buffers[0].size)) {
+            kfree(strm->alsa_hw_desc);
+            strm->alsa_hw_desc = NULL;
+        }
+    }
+}
+
+static void aaudio_init_bs_stream_host(struct aaudio_device *a, struct aaudio_stream *strm,
+        struct aaudio_buffer_struct_stream *bs_strm)
+{
+    size_t size;
+    dma_addr_t dma_addr;
+    void *dma_ptr;
+    size = strm->desc.bytes_per_packet * 16640;
+    dma_ptr = dma_alloc_coherent(&a->pci->dev, size, &dma_addr, GFP_KERNEL);
+    if (!dma_ptr) {
+        dev_err(a->dev, "dma_alloc_coherent failed\n");
+        return;
+    }
+    bs_strm->buffers[0].address = dma_addr;
+    bs_strm->buffers[0].size = size;
+    bs_strm->num_buffers = 1;
+
+    memset(dma_ptr, 0, size);
+
+    strm->buffer_cnt = 1;
+    strm->buffers = kmalloc_array(strm->buffer_cnt, sizeof(struct aaudio_dma_buf), GFP_KERNEL);
+    if (!strm->buffers) {
+        dev_err(a->dev, "Buffer list allocation failed\n");
+        return;
+    }
+    strm->buffers[0].dma_addr = dma_addr;
+    strm->buffers[0].ptr = dma_ptr;
+    strm->buffers[0].size = size;
+
+    strm->alsa_hw_desc = kmalloc(sizeof(struct snd_pcm_hardware), GFP_KERNEL);
+    if (aaudio_create_hw_info(&strm->desc, strm->alsa_hw_desc, strm->buffers[0].size)) {
+        kfree(strm->alsa_hw_desc);
+        strm->alsa_hw_desc = NULL;
+    }
+}
+
+static void aaudio_handle_prop_change(struct aaudio_device *a, struct aaudio_msg *msg);
+
+void aaudio_handle_notification(struct aaudio_device *a, struct aaudio_msg *msg)
+{
+    struct aaudio_send_ctx sctx;
+    struct aaudio_msg_base base;
+    if (aaudio_msg_read_base(msg, &base))
+        return;
+    switch (base.msg) {
+        case AAUDIO_MSG_NOTIFICATION_BOOT:
+            dev_info(a->dev, "Received boot notification from remote\n");
+
+            /* Resend the alive notify */
+            if (aaudio_send(a, &sctx, 500,
+                    aaudio_msg_write_alive_notification, 1, 3)) {
+                pr_err("Sending alive notification failed\n");
+            }
+            break;
+        case AAUDIO_MSG_NOTIFICATION_ALIVE:
+            dev_info(a->dev, "Received alive notification from remote\n");
+            complete_all(&a->remote_alive);
+            break;
+        case AAUDIO_MSG_PROPERTY_CHANGED:
+            aaudio_handle_prop_change(a, msg);
+            break;
+        default:
+            dev_info(a->dev, "Unhandled notification %i", base.msg);
+            break;
+    }
+}
+
+struct aaudio_prop_change_work_struct {
+    struct work_struct ws;
+    struct aaudio_device *a;
+    aaudio_device_id_t dev;
+    aaudio_object_id_t obj;
+    struct aaudio_prop_addr prop;
+};
+
+static void aaudio_handle_jack_connection_change(struct aaudio_subdevice *sdev)
+{
+    u32 plugged;
+    if (!sdev->jack)
+        return;
+    /* NOTE: Apple made the plug status scoped to the input and output streams. This makes no sense for us, so I just
+     * always pick the OUTPUT status. */
+    if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, sdev->dev_id,
+            AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_JACK_PLUGGED, 0), NULL, 0, &plugged, sizeof(plugged))) {
+        dev_err(sdev->a->dev, "Failed to get jack enable status\n");
+        return;
+    }
+    dev_dbg(sdev->a->dev, "Jack is now %s\n", plugged ? "plugged" : "unplugged");
+    snd_jack_report(sdev->jack, plugged ? sdev->jack->type : 0);
+}
+
+void aaudio_handle_prop_change_work(struct work_struct *ws)
+{
+    struct aaudio_prop_change_work_struct *work = container_of(ws, struct aaudio_prop_change_work_struct, ws);
+    struct aaudio_subdevice *sdev;
+
+    sdev = aaudio_find_dev_by_dev_id(work->a, work->dev);
+    if (!sdev) {
+        dev_err(work->a->dev, "Property notification change: device not found\n");
+        goto done;
+    }
+    dev_dbg(work->a->dev, "Property changed for device: %s\n", sdev->uid);
+
+    if (work->prop.scope == AAUDIO_PROP_SCOPE_OUTPUT && work->prop.selector == AAUDIO_PROP_JACK_PLUGGED) {
+        aaudio_handle_jack_connection_change(sdev);
+    }
+
+done:
+    kfree(work);
+}
+
+void aaudio_handle_prop_change(struct aaudio_device *a, struct aaudio_msg *msg)
+{
+    /* NOTE: This is a scheduled work because this callback will generally need to query device information and this
+     * is not possible when we are in the reply parsing code's context. */
+    struct aaudio_prop_change_work_struct *work;
+    work = kmalloc(sizeof(struct aaudio_prop_change_work_struct), GFP_KERNEL);
+    work->a = a;
+    INIT_WORK(&work->ws, aaudio_handle_prop_change_work);
+    aaudio_msg_read_property_changed(msg, &work->dev, &work->obj, &work->prop);
+    schedule_work(&work->ws);
+}
+
+#define aaudio_send_cmd_response(a, sctx, msg, fn, ...) \
+    if (aaudio_send_with_tag(a, sctx, ((struct aaudio_msg_header *) msg->data)->tag, 500, fn, ##__VA_ARGS__)) \
+        pr_err("aaudio: Failed to reply to a command\n");
+
+void aaudio_handle_cmd_timestamp(struct aaudio_device *a, struct aaudio_msg *msg)
+{
+    ktime_t time_os = ktime_get_boottime();
+    struct aaudio_send_ctx sctx;
+    struct aaudio_subdevice *sdev;
+    u64 devid, timestamp, update_seed;
+    aaudio_msg_read_update_timestamp(msg, &devid, &timestamp, &update_seed);
+    dev_dbg(a->dev, "Received timestamp update for dev=%llx ts=%llx seed=%llx\n", devid, timestamp, update_seed);
+
+    sdev = aaudio_find_dev_by_dev_id(a, devid);
+    aaudio_handle_timestamp(sdev, time_os, timestamp);
+
+    aaudio_send_cmd_response(a, &sctx, msg,
+            aaudio_msg_write_update_timestamp_response);
+}
+
+void aaudio_handle_command(struct aaudio_device *a, struct aaudio_msg *msg)
+{
+    struct aaudio_msg_base base;
+    if (aaudio_msg_read_base(msg, &base))
+        return;
+    switch (base.msg) {
+        case AAUDIO_MSG_UPDATE_TIMESTAMP:
+            aaudio_handle_cmd_timestamp(a, msg);
+            break;
+        default:
+            dev_info(a->dev, "Unhandled device command %i", base.msg);
+            break;
+    }
+}
+
+static struct pci_device_id aaudio_ids[  ] = {
+        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x1803) },
+        { 0, },
+};
+
+struct dev_pm_ops aaudio_pci_driver_pm = {
+        .suspend = aaudio_suspend,
+        .resume = aaudio_resume
+};
+struct pci_driver aaudio_pci_driver = {
+        .name = "aaudio",
+        .id_table = aaudio_ids,
+        .probe = aaudio_probe,
+        .remove = aaudio_remove,
+        .driver = {
+                .pm = &aaudio_pci_driver_pm
+        }
+};
+
+
+int aaudio_module_init(void)
+{
+    int result;
+    if ((result = alloc_chrdev_region(&aaudio_chrdev, 0, 1, "aaudio")))
+        goto fail_chrdev;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0)
+    aaudio_class = class_create(THIS_MODULE, "aaudio");
+#else
+    aaudio_class = class_create("aaudio");
+#endif
+    if (IS_ERR(aaudio_class)) {
+        result = PTR_ERR(aaudio_class);
+        goto fail_class;
+    }
+    
+    result = pci_register_driver(&aaudio_pci_driver);
+    if (result)
+        goto fail_drv;
+    return 0;
+
+fail_drv:
+    pci_unregister_driver(&aaudio_pci_driver);
+fail_class:
+    class_destroy(aaudio_class);
+fail_chrdev:
+    unregister_chrdev_region(aaudio_chrdev, 1);
+    if (!result)
+        result = -EINVAL;
+    return result;
+}
+
+void aaudio_module_exit(void)
+{
+    pci_unregister_driver(&aaudio_pci_driver);
+    class_destroy(aaudio_class);
+    unregister_chrdev_region(aaudio_chrdev, 1);
+}
+
+struct aaudio_alsa_pcm_id_mapping aaudio_alsa_id_mappings[] = {
+        {"Speaker", 0},
+        {"Digital Mic", 1},
+        {"Codec Output", 2},
+        {"Codec Input", 3},
+        {"Bridge Loopback", 4},
+        {}
+};
+
+module_param_named(index, aaudio_alsa_index, int, 0444);
+MODULE_PARM_DESC(index, "Index value for Apple Internal Audio soundcard.");
+module_param_named(id, aaudio_alsa_id, charp, 0444);
+MODULE_PARM_DESC(id, "ID string for Apple Internal Audio soundcard.");
diff --git a/drivers/staging/apple-bce/audio/audio.h b/drivers/staging/apple-bce/audio/audio.h
new file mode 100644
index 000000000000..004bc1e22ea4
--- /dev/null
+++ b/drivers/staging/apple-bce/audio/audio.h
@@ -0,0 +1,125 @@
+#ifndef AAUDIO_H
+#define AAUDIO_H
+
+#include <linux/types.h>
+#include <sound/pcm.h>
+#include "../apple_bce.h"
+#include "protocol_bce.h"
+#include "description.h"
+
+#define AAUDIO_SIG 0x19870423
+
+#define AAUDIO_DEVICE_MAX_UID_LEN 128
+#define AAUDIO_DEIVCE_MAX_INPUT_STREAMS 1
+#define AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS 1
+#define AAUDIO_DEIVCE_MAX_BUFFER_COUNT 1
+
+#define AAUDIO_BUFFER_ID_NONE 0xffu
+
+struct snd_card;
+struct snd_pcm;
+struct snd_pcm_hardware;
+struct snd_jack;
+
+struct __attribute__((packed)) __attribute__((aligned(4))) aaudio_buffer_struct_buffer {
+    size_t address;
+    size_t size;
+    size_t pad[4];
+};
+struct aaudio_buffer_struct_stream {
+    u8 num_buffers;
+    struct aaudio_buffer_struct_buffer buffers[100];
+    char filler[32];
+};
+struct aaudio_buffer_struct_device {
+    char name[128];
+    u8 num_input_streams;
+    u8 num_output_streams;
+    struct aaudio_buffer_struct_stream input_streams[5];
+    struct aaudio_buffer_struct_stream output_streams[5];
+    char filler[128];
+};
+struct aaudio_buffer_struct {
+    u32 version;
+    u32 signature;
+    u32 flags;
+    u8 num_devices;
+    struct aaudio_buffer_struct_device devices[20];
+};
+
+struct aaudio_device;
+struct aaudio_dma_buf {
+    dma_addr_t dma_addr;
+    void *ptr;
+    size_t size;
+};
+struct aaudio_stream {
+    aaudio_object_id_t id;
+    size_t buffer_cnt;
+    struct aaudio_dma_buf *buffers;
+
+    struct aaudio_apple_description desc;
+    struct snd_pcm_hardware *alsa_hw_desc;
+    u32 latency;
+
+    bool waiting_for_first_ts;
+
+    ktime_t remote_timestamp;
+    snd_pcm_sframes_t frame_min;
+    int started;
+};
+struct aaudio_subdevice {
+    struct aaudio_device *a;
+    struct list_head list;
+    aaudio_device_id_t dev_id;
+    u32 in_latency, out_latency;
+    u8 buf_id;
+    int alsa_id;
+    char uid[AAUDIO_DEVICE_MAX_UID_LEN + 1];
+    size_t in_stream_cnt;
+    struct aaudio_stream in_streams[AAUDIO_DEIVCE_MAX_INPUT_STREAMS];
+    size_t out_stream_cnt;
+    struct aaudio_stream out_streams[AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS];
+    bool is_pcm;
+    struct snd_pcm *pcm;
+    struct snd_jack *jack;
+};
+struct aaudio_alsa_pcm_id_mapping {
+    const char *name;
+    int alsa_id;
+};
+
+struct aaudio_device {
+    struct pci_dev *pci;
+    dev_t devt;
+    struct device *dev;
+    void __iomem *reg_mem_bs;
+    dma_addr_t reg_mem_bs_dma;
+    void __iomem *reg_mem_cfg;
+
+    u32 __iomem *reg_mem_gpr;
+
+    struct aaudio_buffer_struct *bs;
+
+    struct apple_bce_device *bce;
+    struct aaudio_bce bcem;
+
+    struct snd_card *card;
+
+    struct list_head subdevice_list;
+    int next_alsa_id;
+
+    struct completion remote_alive;
+};
+
+void aaudio_handle_notification(struct aaudio_device *a, struct aaudio_msg *msg);
+void aaudio_handle_prop_change_work(struct work_struct *ws);
+void aaudio_handle_cmd_timestamp(struct aaudio_device *a, struct aaudio_msg *msg);
+void aaudio_handle_command(struct aaudio_device *a, struct aaudio_msg *msg);
+
+int aaudio_module_init(void);
+void aaudio_module_exit(void);
+
+extern struct aaudio_alsa_pcm_id_mapping aaudio_alsa_id_mappings[];
+
+#endif //AAUDIO_H
diff --git a/drivers/staging/apple-bce/audio/description.h b/drivers/staging/apple-bce/audio/description.h
new file mode 100644
index 000000000000..dfef3ab68f27
--- /dev/null
+++ b/drivers/staging/apple-bce/audio/description.h
@@ -0,0 +1,42 @@
+#ifndef AAUDIO_DESCRIPTION_H
+#define AAUDIO_DESCRIPTION_H
+
+#include <linux/types.h>
+
+struct aaudio_apple_description {
+    u64 sample_rate_double;
+    u32 format_id;
+    u32 format_flags;
+    u32 bytes_per_packet;
+    u32 frames_per_packet;
+    u32 bytes_per_frame;
+    u32 channels_per_frame;
+    u32 bits_per_channel;
+    u32 reserved;
+};
+
+enum {
+    AAUDIO_FORMAT_LPCM = 0x6c70636d  // 'lpcm'
+};
+
+enum {
+    AAUDIO_FORMAT_FLAG_FLOAT = 1,
+    AAUDIO_FORMAT_FLAG_BIG_ENDIAN = 2,
+    AAUDIO_FORMAT_FLAG_SIGNED = 4,
+    AAUDIO_FORMAT_FLAG_PACKED = 8,
+    AAUDIO_FORMAT_FLAG_ALIGNED_HIGH = 16,
+    AAUDIO_FORMAT_FLAG_NON_INTERLEAVED = 32,
+    AAUDIO_FORMAT_FLAG_NON_MIXABLE = 64
+};
+
+static inline u64 aaudio_double_to_u64(u64 d)
+{
+    u8 sign = (u8) ((d >> 63) & 1);
+    s32 exp = (s32) ((d >> 52) & 0x7ff) - 1023;
+    u64 fr = d & ((1LL << 52) - 1);
+    if (sign || exp < 0)
+        return 0;
+    return (u64) ((1LL << exp) + (fr >> (52 - exp)));
+}
+
+#endif //AAUDIO_DESCRIPTION_H
diff --git a/drivers/staging/apple-bce/audio/pcm.c b/drivers/staging/apple-bce/audio/pcm.c
new file mode 100644
index 000000000000..1026e10a9ac5
--- /dev/null
+++ b/drivers/staging/apple-bce/audio/pcm.c
@@ -0,0 +1,308 @@
+#include "pcm.h"
+#include "audio.h"
+
+static u64 aaudio_get_alsa_fmtbit(struct aaudio_apple_description *desc)
+{
+    if (desc->format_flags & AAUDIO_FORMAT_FLAG_FLOAT) {
+        if (desc->bits_per_channel == 32) {
+            if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN)
+                return SNDRV_PCM_FMTBIT_FLOAT_BE;
+            else
+                return SNDRV_PCM_FMTBIT_FLOAT_LE;
+        } else if (desc->bits_per_channel == 64) {
+            if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN)
+                return SNDRV_PCM_FMTBIT_FLOAT64_BE;
+            else
+                return SNDRV_PCM_FMTBIT_FLOAT64_LE;
+        } else {
+            pr_err("aaudio: unsupported bits per channel for float format: %u\n", desc->bits_per_channel);
+            return 0;
+        }
+    }
+#define DEFINE_BPC_OPTION(val, b) \
+    case val: \
+        if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN) { \
+            if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED) \
+                return SNDRV_PCM_FMTBIT_S ## b ## BE; \
+            else \
+                return SNDRV_PCM_FMTBIT_U ## b ## BE; \
+        } else { \
+            if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED) \
+                return SNDRV_PCM_FMTBIT_S ## b ## LE; \
+            else \
+                return SNDRV_PCM_FMTBIT_U ## b ## LE; \
+        }
+    if (desc->format_flags & AAUDIO_FORMAT_FLAG_PACKED) {
+        switch (desc->bits_per_channel) {
+            case 8:
+            case 16:
+            case 32:
+                break;
+            DEFINE_BPC_OPTION(24, 24_3)
+            default:
+                pr_err("aaudio: unsupported bits per channel for packed format: %u\n", desc->bits_per_channel);
+                return 0;
+        }
+    }
+    if (desc->format_flags & AAUDIO_FORMAT_FLAG_ALIGNED_HIGH) {
+        switch (desc->bits_per_channel) {
+            DEFINE_BPC_OPTION(24, 32_)
+            default:
+                pr_err("aaudio: unsupported bits per channel for high-aligned format: %u\n", desc->bits_per_channel);
+                return 0;
+        }
+    }
+    switch (desc->bits_per_channel) {
+        case 8:
+            if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED)
+                return SNDRV_PCM_FMTBIT_S8;
+            else
+                return SNDRV_PCM_FMTBIT_U8;
+        DEFINE_BPC_OPTION(16, 16_)
+        DEFINE_BPC_OPTION(24, 24_)
+        DEFINE_BPC_OPTION(32, 32_)
+        default:
+            pr_err("aaudio: unsupported bits per channel: %u\n", desc->bits_per_channel);
+            return 0;
+    }
+}
+int aaudio_create_hw_info(struct aaudio_apple_description *desc, struct snd_pcm_hardware *alsa_hw,
+        size_t buf_size)
+{
+    uint rate;
+    alsa_hw->info = (SNDRV_PCM_INFO_MMAP |
+                     SNDRV_PCM_INFO_BLOCK_TRANSFER |
+                     SNDRV_PCM_INFO_MMAP_VALID |
+                     SNDRV_PCM_INFO_DOUBLE);
+    if (desc->format_flags & AAUDIO_FORMAT_FLAG_NON_MIXABLE)
+        pr_warn("aaudio: unsupported hw flag: NON_MIXABLE\n");
+    if (!(desc->format_flags & AAUDIO_FORMAT_FLAG_NON_INTERLEAVED))
+        alsa_hw->info |= SNDRV_PCM_INFO_INTERLEAVED;
+    alsa_hw->formats = aaudio_get_alsa_fmtbit(desc);
+    if (!alsa_hw->formats)
+        return -EINVAL;
+    rate = (uint) aaudio_double_to_u64(desc->sample_rate_double);
+    alsa_hw->rates = snd_pcm_rate_to_rate_bit(rate);
+    alsa_hw->rate_min = rate;
+    alsa_hw->rate_max = rate;
+    alsa_hw->channels_min = desc->channels_per_frame;
+    alsa_hw->channels_max = desc->channels_per_frame;
+    alsa_hw->buffer_bytes_max = buf_size;
+    alsa_hw->period_bytes_min = desc->bytes_per_packet;
+    alsa_hw->period_bytes_max = desc->bytes_per_packet;
+    alsa_hw->periods_min = (uint) (buf_size / desc->bytes_per_packet);
+    alsa_hw->periods_max = (uint) (buf_size / desc->bytes_per_packet);
+    pr_debug("aaudio_create_hw_info: format = %llu, rate = %u/%u. channels = %u, periods = %u, period size = %lu\n",
+            alsa_hw->formats, alsa_hw->rate_min, alsa_hw->rates, alsa_hw->channels_min, alsa_hw->periods_min,
+            alsa_hw->period_bytes_min);
+    return 0;
+}
+
+static struct aaudio_stream *aaudio_pcm_stream(struct snd_pcm_substream *substream)
+{
+    struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream);
+    if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+        return &sdev->out_streams[substream->number];
+    else
+        return &sdev->in_streams[substream->number];
+}
+
+static int aaudio_pcm_open(struct snd_pcm_substream *substream)
+{
+    pr_debug("aaudio_pcm_open\n");
+    substream->runtime->hw = *aaudio_pcm_stream(substream)->alsa_hw_desc;
+
+    return 0;
+}
+
+static int aaudio_pcm_close(struct snd_pcm_substream *substream)
+{
+    pr_debug("aaudio_pcm_close\n");
+    return 0;
+}
+
+static int aaudio_pcm_prepare(struct snd_pcm_substream *substream)
+{
+    return 0;
+}
+
+static int aaudio_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params)
+{
+    struct aaudio_stream *astream = aaudio_pcm_stream(substream);
+    pr_debug("aaudio_pcm_hw_params\n");
+
+    if (!astream->buffer_cnt || !astream->buffers)
+        return -EINVAL;
+
+    substream->runtime->dma_area = astream->buffers[0].ptr;
+    substream->runtime->dma_addr = astream->buffers[0].dma_addr;
+    substream->runtime->dma_bytes = astream->buffers[0].size;
+    return 0;
+}
+
+static int aaudio_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+    pr_debug("aaudio_pcm_hw_free\n");
+    return 0;
+}
+
+static void aaudio_pcm_start(struct snd_pcm_substream *substream)
+{
+    struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream);
+    struct aaudio_stream *stream = aaudio_pcm_stream(substream);
+    void *buf;
+    size_t s;
+    ktime_t time_start, time_end;
+    bool back_buffer;
+    time_start = ktime_get();
+
+    back_buffer = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK);
+
+    if (back_buffer) {
+        s = frames_to_bytes(substream->runtime, substream->runtime->control->appl_ptr);
+        buf = kmalloc(s, GFP_KERNEL);
+        memcpy_fromio(buf, substream->runtime->dma_area, s);
+        time_end = ktime_get();
+        pr_debug("aaudio: Backed up the buffer in %lluns [%li]\n", ktime_to_ns(time_end - time_start),
+                substream->runtime->control->appl_ptr);
+    }
+
+    stream->waiting_for_first_ts = true;
+    stream->frame_min = stream->latency;
+
+    aaudio_cmd_start_io(sdev->a, sdev->dev_id);
+    if (back_buffer)
+        memcpy_toio(substream->runtime->dma_area, buf, s);
+
+    time_end = ktime_get();
+    pr_debug("aaudio: Started the audio device in %lluns\n", ktime_to_ns(time_end - time_start));
+}
+
+static int aaudio_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+    struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream);
+    struct aaudio_stream *stream = aaudio_pcm_stream(substream);
+    pr_debug("aaudio_pcm_trigger %x\n", cmd);
+
+    /* We only supports triggers on the #0 buffer */
+    if (substream->number != 0)
+        return 0;
+    switch (cmd) {
+        case SNDRV_PCM_TRIGGER_START:
+            aaudio_pcm_start(substream);
+            stream->started = 1;
+            break;
+        case SNDRV_PCM_TRIGGER_STOP:
+            aaudio_cmd_stop_io(sdev->a, sdev->dev_id);
+            stream->started = 0;
+            break;
+        default:
+            return -EINVAL;
+    }
+    return 0;
+}
+
+static snd_pcm_uframes_t aaudio_pcm_pointer(struct snd_pcm_substream *substream)
+{
+    struct aaudio_stream *stream = aaudio_pcm_stream(substream);
+    ktime_t time_from_start;
+    snd_pcm_sframes_t frames;
+    snd_pcm_sframes_t buffer_time_length;
+
+    if (!stream->started || stream->waiting_for_first_ts) {
+        pr_warn("aaudio_pcm_pointer while not started\n");
+        return 0;
+    }
+
+    /* Approximate the pointer based on the last received timestamp */
+    time_from_start = ktime_get_boottime() - stream->remote_timestamp;
+    buffer_time_length = NSEC_PER_SEC * substream->runtime->buffer_size / substream->runtime->rate;
+    frames = (ktime_to_ns(time_from_start) % buffer_time_length) * substream->runtime->buffer_size / buffer_time_length;
+    if (ktime_to_ns(time_from_start) < buffer_time_length) {
+        if (frames < stream->frame_min)
+            frames = stream->frame_min;
+        else
+            stream->frame_min = 0;
+    } else {
+        if (ktime_to_ns(time_from_start) < 2 * buffer_time_length)
+            stream->frame_min = frames;
+        else
+            stream->frame_min = 0; /* Heavy desync */
+    }
+    frames -= stream->latency;
+    if (frames < 0)
+        frames += ((-frames - 1) / substream->runtime->buffer_size + 1) * substream->runtime->buffer_size;
+    return (snd_pcm_uframes_t) frames;
+}
+
+static struct snd_pcm_ops aaudio_pcm_ops = {
+        .open =        aaudio_pcm_open,
+        .close =       aaudio_pcm_close,
+        .ioctl =       snd_pcm_lib_ioctl,
+        .hw_params =   aaudio_pcm_hw_params,
+        .hw_free =     aaudio_pcm_hw_free,
+        .prepare =     aaudio_pcm_prepare,
+        .trigger =     aaudio_pcm_trigger,
+        .pointer =     aaudio_pcm_pointer,
+        .mmap    =     snd_pcm_lib_mmap_iomem
+};
+
+int aaudio_create_pcm(struct aaudio_subdevice *sdev)
+{
+    struct snd_pcm *pcm;
+    struct aaudio_alsa_pcm_id_mapping *id_mapping;
+    int err;
+
+    if (!sdev->is_pcm || (sdev->in_stream_cnt == 0 && sdev->out_stream_cnt == 0)) {
+        return -EINVAL;
+    }
+
+    for (id_mapping = aaudio_alsa_id_mappings; id_mapping->name; id_mapping++) {
+        if (!strcmp(sdev->uid, id_mapping->name)) {
+            sdev->alsa_id = id_mapping->alsa_id;
+            break;
+        }
+    }
+    if (!id_mapping->name)
+        sdev->alsa_id = sdev->a->next_alsa_id++;
+    err = snd_pcm_new(sdev->a->card, sdev->uid, sdev->alsa_id,
+            (int) sdev->out_stream_cnt, (int) sdev->in_stream_cnt, &pcm);
+    if (err < 0)
+        return err;
+    pcm->private_data = sdev;
+    pcm->nonatomic = 1;
+    sdev->pcm = pcm;
+    strcpy(pcm->name, sdev->uid);
+    snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &aaudio_pcm_ops);
+    snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &aaudio_pcm_ops);
+    return 0;
+}
+
+static void aaudio_handle_stream_timestamp(struct snd_pcm_substream *substream, ktime_t timestamp)
+{
+    unsigned long flags;
+    struct aaudio_stream *stream;
+
+    stream = aaudio_pcm_stream(substream);
+    snd_pcm_stream_lock_irqsave(substream, flags);
+    stream->remote_timestamp = timestamp;
+    if (stream->waiting_for_first_ts) {
+        stream->waiting_for_first_ts = false;
+        snd_pcm_stream_unlock_irqrestore(substream, flags);
+        return;
+    }
+    snd_pcm_stream_unlock_irqrestore(substream, flags);
+    snd_pcm_period_elapsed(substream);
+}
+
+void aaudio_handle_timestamp(struct aaudio_subdevice *sdev, ktime_t os_timestamp, u64 dev_timestamp)
+{
+    struct snd_pcm_substream *substream;
+
+    substream = sdev->pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream;
+    if (substream)
+        aaudio_handle_stream_timestamp(substream, dev_timestamp);
+    substream = sdev->pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream;
+    if (substream)
+        aaudio_handle_stream_timestamp(substream, os_timestamp);
+}
diff --git a/drivers/staging/apple-bce/audio/pcm.h b/drivers/staging/apple-bce/audio/pcm.h
new file mode 100644
index 000000000000..ea5f35fbe408
--- /dev/null
+++ b/drivers/staging/apple-bce/audio/pcm.h
@@ -0,0 +1,16 @@
+#ifndef AAUDIO_PCM_H
+#define AAUDIO_PCM_H
+
+#include <linux/types.h>
+#include <linux/ktime.h>
+
+struct aaudio_subdevice;
+struct aaudio_apple_description;
+struct snd_pcm_hardware;
+
+int aaudio_create_hw_info(struct aaudio_apple_description *desc, struct snd_pcm_hardware *alsa_hw, size_t buf_size);
+int aaudio_create_pcm(struct aaudio_subdevice *sdev);
+
+void aaudio_handle_timestamp(struct aaudio_subdevice *sdev, ktime_t os_timestamp, u64 dev_timestamp);
+
+#endif //AAUDIO_PCM_H
diff --git a/drivers/staging/apple-bce/audio/protocol.c b/drivers/staging/apple-bce/audio/protocol.c
new file mode 100644
index 000000000000..2314813aeead
--- /dev/null
+++ b/drivers/staging/apple-bce/audio/protocol.c
@@ -0,0 +1,347 @@
+#include "protocol.h"
+#include "protocol_bce.h"
+#include "audio.h"
+
+int aaudio_msg_read_base(struct aaudio_msg *msg, struct aaudio_msg_base *base)
+{
+    if (msg->size < sizeof(struct aaudio_msg_header) + sizeof(struct aaudio_msg_base) * 2)
+        return -EINVAL;
+    *base = *((struct aaudio_msg_base *) ((struct aaudio_msg_header *) msg->data + 1));
+    return 0;
+}
+
+#define READ_START(type) \
+    size_t offset = sizeof(struct aaudio_msg_header) + sizeof(struct aaudio_msg_base); (void)offset; \
+    if (((struct aaudio_msg_base *) ((struct aaudio_msg_header *) msg->data + 1))->msg != type) \
+        return -EINVAL;
+#define READ_DEVID_VAR(devid) *devid = ((struct aaudio_msg_header *) msg->data)->device_id
+#define READ_VAL(type) ({ offset += sizeof(type); *((type *) ((u8 *) msg->data + offset - sizeof(type))); })
+#define READ_VAR(type, var) *var = READ_VAL(type)
+
+int aaudio_msg_read_start_io_response(struct aaudio_msg *msg)
+{
+    READ_START(AAUDIO_MSG_START_IO_RESPONSE);
+    return 0;
+}
+
+int aaudio_msg_read_stop_io_response(struct aaudio_msg *msg)
+{
+    READ_START(AAUDIO_MSG_STOP_IO_RESPONSE);
+    return 0;
+}
+
+int aaudio_msg_read_update_timestamp(struct aaudio_msg *msg, aaudio_device_id_t *devid,
+        u64 *timestamp, u64 *update_seed)
+{
+    READ_START(AAUDIO_MSG_UPDATE_TIMESTAMP);
+    READ_DEVID_VAR(devid);
+    READ_VAR(u64, timestamp);
+    READ_VAR(u64, update_seed);
+    return 0;
+}
+
+int aaudio_msg_read_get_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj,
+        struct aaudio_prop_addr *prop, void **data, u64 *data_size)
+{
+    READ_START(AAUDIO_MSG_GET_PROPERTY_RESPONSE);
+    READ_VAR(aaudio_object_id_t, obj);
+    READ_VAR(u32, &prop->element);
+    READ_VAR(u32, &prop->scope);
+    READ_VAR(u32, &prop->selector);
+    READ_VAR(u64, data_size);
+    *data = ((u8 *) msg->data + offset);
+    /* offset += data_size; */
+    return 0;
+}
+
+int aaudio_msg_read_set_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj)
+{
+    READ_START(AAUDIO_MSG_SET_PROPERTY_RESPONSE);
+    READ_VAR(aaudio_object_id_t, obj);
+    return 0;
+}
+
+int aaudio_msg_read_property_listener_response(struct aaudio_msg *msg, aaudio_object_id_t *obj,
+        struct aaudio_prop_addr *prop)
+{
+    READ_START(AAUDIO_MSG_PROPERTY_LISTENER_RESPONSE);
+    READ_VAR(aaudio_object_id_t, obj);
+    READ_VAR(u32, &prop->element);
+    READ_VAR(u32, &prop->scope);
+    READ_VAR(u32, &prop->selector);
+    return 0;
+}
+
+int aaudio_msg_read_property_changed(struct aaudio_msg *msg, aaudio_device_id_t *devid, aaudio_object_id_t *obj,
+        struct aaudio_prop_addr *prop)
+{
+    READ_START(AAUDIO_MSG_PROPERTY_CHANGED);
+    READ_DEVID_VAR(devid);
+    READ_VAR(aaudio_object_id_t, obj);
+    READ_VAR(u32, &prop->element);
+    READ_VAR(u32, &prop->scope);
+    READ_VAR(u32, &prop->selector);
+    return 0;
+}
+
+int aaudio_msg_read_set_input_stream_address_ranges_response(struct aaudio_msg *msg)
+{
+    READ_START(AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES_RESPONSE);
+    return 0;
+}
+
+int aaudio_msg_read_get_input_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt)
+{
+    READ_START(AAUDIO_MSG_GET_INPUT_STREAM_LIST_RESPONSE);
+    READ_VAR(u64, str_cnt);
+    *str_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset);
+    /* offset += str_cnt * sizeof(aaudio_object_id_t); */
+    return 0;
+}
+
+int aaudio_msg_read_get_output_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt)
+{
+    READ_START(AAUDIO_MSG_GET_OUTPUT_STREAM_LIST_RESPONSE);
+    READ_VAR(u64, str_cnt);
+    *str_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset);
+    /* offset += str_cnt * sizeof(aaudio_object_id_t); */
+    return 0;
+}
+
+int aaudio_msg_read_set_remote_access_response(struct aaudio_msg *msg)
+{
+    READ_START(AAUDIO_MSG_SET_REMOTE_ACCESS_RESPONSE);
+    return 0;
+}
+
+int aaudio_msg_read_get_device_list_response(struct aaudio_msg *msg, aaudio_device_id_t **dev_l, u64 *dev_cnt)
+{
+    READ_START(AAUDIO_MSG_GET_DEVICE_LIST_RESPONSE);
+    READ_VAR(u64, dev_cnt);
+    *dev_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset);
+    /* offset += dev_cnt * sizeof(aaudio_device_id_t); */
+    return 0;
+}
+
+#define WRITE_START_OF_TYPE(typev, devid) \
+    size_t offset = sizeof(struct aaudio_msg_header); (void) offset; \
+    ((struct aaudio_msg_header *) msg->data)->type = (typev); \
+    ((struct aaudio_msg_header *) msg->data)->device_id = (devid);
+#define WRITE_START_COMMAND(devid) WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_COMMAND, devid)
+#define WRITE_START_RESPONSE() WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_RESPONSE, 0)
+#define WRITE_START_NOTIFICATION() WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_NOTIFICATION, 0)
+#define WRITE_VAL(type, value) { *((type *) ((u8 *) msg->data + offset)) = value; offset += sizeof(value); }
+#define WRITE_BIN(value, size) { memcpy((u8 *) msg->data + offset, value, size); offset += size; }
+#define WRITE_BASE(type) WRITE_VAL(u32, type) WRITE_VAL(u32, 0)
+#define WRITE_END() { msg->size = offset; }
+
+void aaudio_msg_write_start_io(struct aaudio_msg *msg, aaudio_device_id_t dev)
+{
+    WRITE_START_COMMAND(dev);
+    WRITE_BASE(AAUDIO_MSG_START_IO);
+    WRITE_END();
+}
+
+void aaudio_msg_write_stop_io(struct aaudio_msg *msg, aaudio_device_id_t dev)
+{
+    WRITE_START_COMMAND(dev);
+    WRITE_BASE(AAUDIO_MSG_STOP_IO);
+    WRITE_END();
+}
+
+void aaudio_msg_write_get_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size)
+{
+    WRITE_START_COMMAND(dev);
+    WRITE_BASE(AAUDIO_MSG_GET_PROPERTY);
+    WRITE_VAL(aaudio_object_id_t, obj);
+    WRITE_VAL(u32, prop.element);
+    WRITE_VAL(u32, prop.scope);
+    WRITE_VAL(u32, prop.selector);
+    WRITE_VAL(u64, qualifier_size);
+    WRITE_BIN(qualifier, qualifier_size);
+    WRITE_END();
+}
+
+void aaudio_msg_write_set_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
+        struct aaudio_prop_addr prop, void *data, u64 data_size, void *qualifier, u64 qualifier_size)
+{
+    WRITE_START_COMMAND(dev);
+    WRITE_BASE(AAUDIO_MSG_SET_PROPERTY);
+    WRITE_VAL(aaudio_object_id_t, obj);
+    WRITE_VAL(u32, prop.element);
+    WRITE_VAL(u32, prop.scope);
+    WRITE_VAL(u32, prop.selector);
+    WRITE_VAL(u64, data_size);
+    WRITE_BIN(data, data_size);
+    WRITE_VAL(u64, qualifier_size);
+    WRITE_BIN(qualifier, qualifier_size);
+    WRITE_END();
+}
+
+void aaudio_msg_write_property_listener(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
+        struct aaudio_prop_addr prop)
+{
+    WRITE_START_COMMAND(dev);
+    WRITE_BASE(AAUDIO_MSG_PROPERTY_LISTENER);
+    WRITE_VAL(aaudio_object_id_t, obj);
+    WRITE_VAL(u32, prop.element);
+    WRITE_VAL(u32, prop.scope);
+    WRITE_VAL(u32, prop.selector);
+    WRITE_END();
+}
+
+void aaudio_msg_write_set_input_stream_address_ranges(struct aaudio_msg *msg, aaudio_device_id_t devid)
+{
+    WRITE_START_COMMAND(devid);
+    WRITE_BASE(AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES);
+    WRITE_END();
+}
+
+void aaudio_msg_write_get_input_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid)
+{
+    WRITE_START_COMMAND(devid);
+    WRITE_BASE(AAUDIO_MSG_GET_INPUT_STREAM_LIST);
+    WRITE_END();
+}
+
+void aaudio_msg_write_get_output_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid)
+{
+    WRITE_START_COMMAND(devid);
+    WRITE_BASE(AAUDIO_MSG_GET_OUTPUT_STREAM_LIST);
+    WRITE_END();
+}
+
+void aaudio_msg_write_set_remote_access(struct aaudio_msg *msg, u64 mode)
+{
+    WRITE_START_COMMAND(0);
+    WRITE_BASE(AAUDIO_MSG_SET_REMOTE_ACCESS);
+    WRITE_VAL(u64, mode);
+    WRITE_END();
+}
+
+void aaudio_msg_write_alive_notification(struct aaudio_msg *msg, u32 proto_ver, u32 msg_ver)
+{
+    WRITE_START_NOTIFICATION();
+    WRITE_BASE(AAUDIO_MSG_NOTIFICATION_ALIVE);
+    WRITE_VAL(u32, proto_ver);
+    WRITE_VAL(u32, msg_ver);
+    WRITE_END();
+}
+
+void aaudio_msg_write_update_timestamp_response(struct aaudio_msg *msg)
+{
+    WRITE_START_RESPONSE();
+    WRITE_BASE(AAUDIO_MSG_UPDATE_TIMESTAMP_RESPONSE);
+    WRITE_END();
+}
+
+void aaudio_msg_write_get_device_list(struct aaudio_msg *msg)
+{
+    WRITE_START_COMMAND(0);
+    WRITE_BASE(AAUDIO_MSG_GET_DEVICE_LIST);
+    WRITE_END();
+}
+
+#define CMD_SHARED_VARS_NO_REPLY \
+    int status = 0; \
+    struct aaudio_send_ctx sctx;
+#define CMD_SHARED_VARS \
+    CMD_SHARED_VARS_NO_REPLY \
+    struct aaudio_msg reply = aaudio_reply_alloc(); \
+    struct aaudio_msg *buf = &reply;
+#define CMD_SEND_REQUEST(fn, ...) \
+    if ((status = aaudio_send_cmd_sync(a, &sctx, buf, 500, fn, ##__VA_ARGS__))) \
+        return status;
+#define CMD_DEF_SHARED_AND_SEND(fn, ...) \
+    CMD_SHARED_VARS \
+    CMD_SEND_REQUEST(fn, ##__VA_ARGS__);
+#define CMD_DEF_SHARED_NO_REPLY_AND_SEND(fn, ...) \
+    CMD_SHARED_VARS_NO_REPLY \
+    CMD_SEND_REQUEST(fn, ##__VA_ARGS__);
+#define CMD_HNDL_REPLY_NO_FREE(fn, ...) \
+    status = fn(buf, ##__VA_ARGS__); \
+    return status;
+#define CMD_HNDL_REPLY_AND_FREE(fn, ...) \
+    status = fn(buf, ##__VA_ARGS__); \
+    aaudio_reply_free(&reply); \
+    return status;
+
+int aaudio_cmd_start_io(struct aaudio_device *a, aaudio_device_id_t devid)
+{
+    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_start_io, devid);
+    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_start_io_response);
+}
+int aaudio_cmd_stop_io(struct aaudio_device *a, aaudio_device_id_t devid)
+{
+    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_stop_io, devid);
+    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_stop_io_response);
+}
+int aaudio_cmd_get_property(struct aaudio_device *a, struct aaudio_msg *buf,
+        aaudio_device_id_t devid, aaudio_object_id_t obj,
+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void **data, u64 *data_size)
+{
+    CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_property, devid, obj, prop, qualifier, qualifier_size);
+    CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_property_response, &obj, &prop, data, data_size);
+}
+int aaudio_cmd_get_primitive_property(struct aaudio_device *a,
+        aaudio_device_id_t devid, aaudio_object_id_t obj,
+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size)
+{
+    int status;
+    struct aaudio_msg reply = aaudio_reply_alloc();
+    void *r_data;
+    u64 r_data_size;
+    if ((status = aaudio_cmd_get_property(a, &reply, devid, obj, prop, qualifier, qualifier_size,
+            &r_data, &r_data_size)))
+        goto finish;
+    if (r_data_size != data_size) {
+        status = -EINVAL;
+        goto finish;
+    }
+    memcpy(data, r_data, data_size);
+finish:
+    aaudio_reply_free(&reply);
+    return status;
+}
+int aaudio_cmd_set_property(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size)
+{
+    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_property, devid, obj, prop, data, data_size,
+            qualifier, qualifier_size);
+    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_property_response, &obj);
+}
+int aaudio_cmd_property_listener(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
+        struct aaudio_prop_addr prop)
+{
+    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_property_listener, devid, obj, prop);
+    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_property_listener_response, &obj, &prop);
+}
+int aaudio_cmd_set_input_stream_address_ranges(struct aaudio_device *a, aaudio_device_id_t devid)
+{
+    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_input_stream_address_ranges, devid);
+    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_input_stream_address_ranges_response);
+}
+int aaudio_cmd_get_input_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
+        aaudio_object_id_t **str_l, u64 *str_cnt)
+{
+    CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_input_stream_list, devid);
+    CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_input_stream_list_response, str_l, str_cnt);
+}
+int aaudio_cmd_get_output_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
+        aaudio_object_id_t **str_l, u64 *str_cnt)
+{
+    CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_output_stream_list, devid);
+    CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_output_stream_list_response, str_l, str_cnt);
+}
+int aaudio_cmd_set_remote_access(struct aaudio_device *a, u64 mode)
+{
+    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_remote_access, mode);
+    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_remote_access_response);
+}
+int aaudio_cmd_get_device_list(struct aaudio_device *a, struct aaudio_msg *buf,
+        aaudio_device_id_t **dev_l, u64 *dev_cnt)
+{
+    CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_device_list);
+    CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_device_list_response, dev_l, dev_cnt);
+}
\ No newline at end of file
diff --git a/drivers/staging/apple-bce/audio/protocol.h b/drivers/staging/apple-bce/audio/protocol.h
new file mode 100644
index 000000000000..3427486f3f57
--- /dev/null
+++ b/drivers/staging/apple-bce/audio/protocol.h
@@ -0,0 +1,147 @@
+#ifndef AAUDIO_PROTOCOL_H
+#define AAUDIO_PROTOCOL_H
+
+#include <linux/types.h>
+
+struct aaudio_device;
+
+typedef u64 aaudio_device_id_t;
+typedef u64 aaudio_object_id_t;
+
+struct aaudio_msg {
+    void *data;
+    size_t size;
+};
+
+struct __attribute__((packed)) aaudio_msg_header {
+    char tag[4];
+    u8 type;
+    aaudio_device_id_t device_id; // Idk, use zero for commands?
+};
+struct __attribute__((packed)) aaudio_msg_base {
+    u32 msg;
+    u32 status;
+};
+
+struct aaudio_prop_addr {
+    u32 scope;
+    u32 selector;
+    u32 element;
+};
+#define AAUDIO_PROP(scope, sel, el) (struct aaudio_prop_addr) { scope, sel, el }
+
+enum {
+    AAUDIO_MSG_TYPE_COMMAND = 1,
+    AAUDIO_MSG_TYPE_RESPONSE = 2,
+    AAUDIO_MSG_TYPE_NOTIFICATION = 3
+};
+
+enum {
+    AAUDIO_MSG_START_IO = 0,
+    AAUDIO_MSG_START_IO_RESPONSE = 1,
+    AAUDIO_MSG_STOP_IO = 2,
+    AAUDIO_MSG_STOP_IO_RESPONSE = 3,
+    AAUDIO_MSG_UPDATE_TIMESTAMP = 4,
+    AAUDIO_MSG_GET_PROPERTY = 7,
+    AAUDIO_MSG_GET_PROPERTY_RESPONSE = 8,
+    AAUDIO_MSG_SET_PROPERTY = 9,
+    AAUDIO_MSG_SET_PROPERTY_RESPONSE = 10,
+    AAUDIO_MSG_PROPERTY_LISTENER = 11,
+    AAUDIO_MSG_PROPERTY_LISTENER_RESPONSE = 12,
+    AAUDIO_MSG_PROPERTY_CHANGED = 13,
+    AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES = 18,
+    AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES_RESPONSE = 19,
+    AAUDIO_MSG_GET_INPUT_STREAM_LIST = 24,
+    AAUDIO_MSG_GET_INPUT_STREAM_LIST_RESPONSE = 25,
+    AAUDIO_MSG_GET_OUTPUT_STREAM_LIST = 26,
+    AAUDIO_MSG_GET_OUTPUT_STREAM_LIST_RESPONSE = 27,
+    AAUDIO_MSG_SET_REMOTE_ACCESS = 32,
+    AAUDIO_MSG_SET_REMOTE_ACCESS_RESPONSE = 33,
+    AAUDIO_MSG_UPDATE_TIMESTAMP_RESPONSE = 34,
+
+    AAUDIO_MSG_NOTIFICATION_ALIVE = 100,
+    AAUDIO_MSG_GET_DEVICE_LIST = 101,
+    AAUDIO_MSG_GET_DEVICE_LIST_RESPONSE = 102,
+    AAUDIO_MSG_NOTIFICATION_BOOT = 104
+};
+
+enum {
+    AAUDIO_REMOTE_ACCESS_OFF = 0,
+    AAUDIO_REMOTE_ACCESS_ON = 2
+};
+
+enum {
+    AAUDIO_PROP_SCOPE_GLOBAL = 0x676c6f62, // 'glob'
+    AAUDIO_PROP_SCOPE_INPUT  = 0x696e7074, // 'inpt'
+    AAUDIO_PROP_SCOPE_OUTPUT = 0x6f757470  // 'outp'
+};
+
+enum {
+    AAUDIO_PROP_UID          = 0x75696420, // 'uid '
+    AAUDIO_PROP_BOOL_VALUE   = 0x6263766c, // 'bcvl'
+    AAUDIO_PROP_JACK_PLUGGED = 0x6a61636b, // 'jack'
+    AAUDIO_PROP_SEL_VOLUME   = 0x64656176, // 'deav'
+    AAUDIO_PROP_LATENCY      = 0x6c746e63, // 'ltnc'
+    AAUDIO_PROP_PHYS_FORMAT  = 0x70667420  // 'pft '
+};
+
+int aaudio_msg_read_base(struct aaudio_msg *msg, struct aaudio_msg_base *base);
+
+int aaudio_msg_read_start_io_response(struct aaudio_msg *msg);
+int aaudio_msg_read_stop_io_response(struct aaudio_msg *msg);
+int aaudio_msg_read_update_timestamp(struct aaudio_msg *msg, aaudio_device_id_t *devid,
+        u64 *timestamp, u64 *update_seed);
+int aaudio_msg_read_get_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj,
+        struct aaudio_prop_addr *prop, void **data, u64 *data_size);
+int aaudio_msg_read_set_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj);
+int aaudio_msg_read_property_listener_response(struct aaudio_msg *msg,aaudio_object_id_t *obj,
+        struct aaudio_prop_addr *prop);
+int aaudio_msg_read_property_changed(struct aaudio_msg *msg, aaudio_device_id_t *devid, aaudio_object_id_t *obj,
+        struct aaudio_prop_addr *prop);
+int aaudio_msg_read_set_input_stream_address_ranges_response(struct aaudio_msg *msg);
+int aaudio_msg_read_get_input_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt);
+int aaudio_msg_read_get_output_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt);
+int aaudio_msg_read_set_remote_access_response(struct aaudio_msg *msg);
+int aaudio_msg_read_get_device_list_response(struct aaudio_msg *msg, aaudio_device_id_t **dev_l, u64 *dev_cnt);
+
+void aaudio_msg_write_start_io(struct aaudio_msg *msg, aaudio_device_id_t dev);
+void aaudio_msg_write_stop_io(struct aaudio_msg *msg, aaudio_device_id_t dev);
+void aaudio_msg_write_get_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size);
+void aaudio_msg_write_set_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
+        struct aaudio_prop_addr prop, void *data, u64 data_size, void *qualifier, u64 qualifier_size);
+void aaudio_msg_write_property_listener(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
+        struct aaudio_prop_addr prop);
+void aaudio_msg_write_set_input_stream_address_ranges(struct aaudio_msg *msg, aaudio_device_id_t devid);
+void aaudio_msg_write_get_input_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid);
+void aaudio_msg_write_get_output_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid);
+void aaudio_msg_write_set_remote_access(struct aaudio_msg *msg, u64 mode);
+void aaudio_msg_write_alive_notification(struct aaudio_msg *msg, u32 proto_ver, u32 msg_ver);
+void aaudio_msg_write_update_timestamp_response(struct aaudio_msg *msg);
+void aaudio_msg_write_get_device_list(struct aaudio_msg *msg);
+
+
+int aaudio_cmd_start_io(struct aaudio_device *a, aaudio_device_id_t devid);
+int aaudio_cmd_stop_io(struct aaudio_device *a, aaudio_device_id_t devid);
+int aaudio_cmd_get_property(struct aaudio_device *a, struct aaudio_msg *buf,
+        aaudio_device_id_t devid, aaudio_object_id_t obj,
+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void **data, u64 *data_size);
+int aaudio_cmd_get_primitive_property(struct aaudio_device *a,
+        aaudio_device_id_t devid, aaudio_object_id_t obj,
+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size);
+int aaudio_cmd_set_property(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size);
+int aaudio_cmd_property_listener(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
+        struct aaudio_prop_addr prop);
+int aaudio_cmd_set_input_stream_address_ranges(struct aaudio_device *a, aaudio_device_id_t devid);
+int aaudio_cmd_get_input_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
+        aaudio_object_id_t **str_l, u64 *str_cnt);
+int aaudio_cmd_get_output_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
+        aaudio_object_id_t **str_l, u64 *str_cnt);
+int aaudio_cmd_set_remote_access(struct aaudio_device *a, u64 mode);
+int aaudio_cmd_get_device_list(struct aaudio_device *a, struct aaudio_msg *buf,
+        aaudio_device_id_t **dev_l, u64 *dev_cnt);
+
+
+
+#endif //AAUDIO_PROTOCOL_H
diff --git a/drivers/staging/apple-bce/audio/protocol_bce.c b/drivers/staging/apple-bce/audio/protocol_bce.c
new file mode 100644
index 000000000000..28f2dfd44d67
--- /dev/null
+++ b/drivers/staging/apple-bce/audio/protocol_bce.c
@@ -0,0 +1,226 @@
+#include "protocol_bce.h"
+
+#include "audio.h"
+
+static void aaudio_bce_out_queue_completion(struct bce_queue_sq *sq);
+static void aaudio_bce_in_queue_completion(struct bce_queue_sq *sq);
+static int aaudio_bce_queue_init(struct aaudio_device *dev, struct aaudio_bce_queue *q, const char *name, int direction,
+                                 bce_sq_completion cfn);
+void aaudio_bce_in_queue_submit_pending(struct aaudio_bce_queue *q, size_t count);
+
+int aaudio_bce_init(struct aaudio_device *dev)
+{
+    int status;
+    struct aaudio_bce *bce = &dev->bcem;
+    bce->cq = bce_create_cq(dev->bce, 0x80);
+    spin_lock_init(&bce->spinlock);
+    if (!bce->cq)
+        return -EINVAL;
+    if ((status = aaudio_bce_queue_init(dev, &bce->qout, "com.apple.BridgeAudio.IntelToARM", DMA_TO_DEVICE,
+            aaudio_bce_out_queue_completion))) {
+        return status;
+    }
+    if ((status = aaudio_bce_queue_init(dev, &bce->qin, "com.apple.BridgeAudio.ARMToIntel", DMA_FROM_DEVICE,
+            aaudio_bce_in_queue_completion))) {
+        return status;
+    }
+    aaudio_bce_in_queue_submit_pending(&bce->qin, bce->qin.el_count);
+    return 0;
+}
+
+int aaudio_bce_queue_init(struct aaudio_device *dev, struct aaudio_bce_queue *q, const char *name, int direction,
+        bce_sq_completion cfn)
+{
+    q->cq = dev->bcem.cq;
+    q->el_size = AAUDIO_BCE_QUEUE_ELEMENT_SIZE;
+    q->el_count = AAUDIO_BCE_QUEUE_ELEMENT_COUNT;
+    /* NOTE: The Apple impl uses 0x80 as the queue size, however we use 21 (in fact 20) to simplify the impl */
+    q->sq = bce_create_sq(dev->bce, q->cq, name, (u32) (q->el_count + 1), direction, cfn, dev);
+    if (!q->sq)
+        return -EINVAL;
+
+    q->data = dma_alloc_coherent(&dev->bce->pci->dev, q->el_size * q->el_count, &q->dma_addr, GFP_KERNEL);
+    if (!q->data) {
+        bce_destroy_sq(dev->bce, q->sq);
+        return -EINVAL;
+    }
+    return 0;
+}
+
+static void aaudio_send_create_tag(struct aaudio_bce *b, int *tagn, char tag[4])
+{
+    char tag_zero[5];
+    b->tag_num = (b->tag_num + 1) % AAUDIO_BCE_QUEUE_TAG_COUNT;
+    *tagn = b->tag_num;
+    snprintf(tag_zero, 5, "S%03d", b->tag_num);
+    *((u32 *) tag) = *((u32 *) tag_zero);
+}
+
+int __aaudio_send_prepare(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, char *tag)
+{
+    int status;
+    size_t index;
+    void *dptr;
+    struct aaudio_msg_header *header;
+    if ((status = bce_reserve_submission(b->qout.sq, &ctx->timeout)))
+        return status;
+    spin_lock_irqsave(&b->spinlock, ctx->irq_flags);
+    index = b->qout.data_tail;
+    dptr = (u8 *) b->qout.data + index * b->qout.el_size;
+    ctx->msg.data = dptr;
+    header = dptr;
+    if (tag)
+        *((u32 *) header->tag) = *((u32 *) tag);
+    else
+        aaudio_send_create_tag(b, &ctx->tag_n, header->tag);
+    return 0;
+}
+
+void __aaudio_send(struct aaudio_bce *b, struct aaudio_send_ctx *ctx)
+{
+    struct bce_qe_submission *s = bce_next_submission(b->qout.sq);
+#ifdef DEBUG
+    pr_debug("aaudio: Sending command data\n");
+    print_hex_dump(KERN_DEBUG, "aaudio:OUT ", DUMP_PREFIX_NONE, 32, 1, ctx->msg.data, ctx->msg.size, true);
+#endif
+    bce_set_submission_single(s, b->qout.dma_addr + (dma_addr_t) (ctx->msg.data - b->qout.data), ctx->msg.size);
+    bce_submit_to_device(b->qout.sq);
+    b->qout.data_tail = (b->qout.data_tail + 1) % b->qout.el_count;
+    spin_unlock_irqrestore(&b->spinlock, ctx->irq_flags);
+}
+
+int __aaudio_send_cmd_sync(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, struct aaudio_msg *reply)
+{
+    struct aaudio_bce_queue_entry ent;
+    DECLARE_COMPLETION_ONSTACK(cmpl);
+    ent.msg = reply;
+    ent.cmpl = &cmpl;
+    b->pending_entries[ctx->tag_n] = &ent;
+    __aaudio_send(b, ctx); /* unlocks the spinlock */
+    ctx->timeout = wait_for_completion_timeout(&cmpl, ctx->timeout);
+    if (ctx->timeout == 0) {
+        /* Remove the pending queue entry; this will be normally handled by the completion route but
+         * during a timeout it won't */
+        spin_lock_irqsave(&b->spinlock, ctx->irq_flags);
+        if (b->pending_entries[ctx->tag_n] == &ent)
+            b->pending_entries[ctx->tag_n] = NULL;
+        spin_unlock_irqrestore(&b->spinlock, ctx->irq_flags);
+        return -ETIMEDOUT;
+    }
+    return 0;
+}
+
+static void aaudio_handle_reply(struct aaudio_bce *b, struct aaudio_msg *reply)
+{
+    const char *tag;
+    int tagn;
+    unsigned long irq_flags;
+    char tag_zero[5];
+    struct aaudio_bce_queue_entry *entry;
+
+    tag = ((struct aaudio_msg_header *) reply->data)->tag;
+    if (tag[0] != 'S') {
+        pr_err("aaudio_handle_reply: Unexpected tag: %.4s\n", tag);
+        return;
+    }
+    *((u32 *) tag_zero) = *((u32 *) tag);
+    tag_zero[4] = 0;
+    if (kstrtoint(&tag_zero[1], 10, &tagn)) {
+        pr_err("aaudio_handle_reply: Tag parse failed: %.4s\n", tag);
+        return;
+    }
+
+    spin_lock_irqsave(&b->spinlock, irq_flags);
+    entry = b->pending_entries[tagn];
+    if (entry) {
+        if (reply->size < entry->msg->size)
+            entry->msg->size = reply->size;
+        memcpy(entry->msg->data, reply->data, entry->msg->size);
+        complete(entry->cmpl);
+
+        b->pending_entries[tagn] = NULL;
+    } else {
+        pr_err("aaudio_handle_reply: No queued item found for tag: %.4s\n", tag);
+    }
+    spin_unlock_irqrestore(&b->spinlock, irq_flags);
+}
+
+static void aaudio_bce_out_queue_completion(struct bce_queue_sq *sq)
+{
+    while (bce_next_completion(sq)) {
+        //pr_info("aaudio: Send confirmed\n");
+        bce_notify_submission_complete(sq);
+    }
+}
+
+static void aaudio_bce_in_queue_handle_msg(struct aaudio_device *a, struct aaudio_msg *msg);
+
+static void aaudio_bce_in_queue_completion(struct bce_queue_sq *sq)
+{
+    struct aaudio_msg msg;
+    struct aaudio_device *dev = sq->userdata;
+    struct aaudio_bce_queue *q = &dev->bcem.qin;
+    struct bce_sq_completion_data *c;
+    size_t cnt = 0;
+
+    mb();
+    while ((c = bce_next_completion(sq))) {
+        msg.data = (u8 *) q->data + q->data_head * q->el_size;
+        msg.size = c->data_size;
+#ifdef DEBUG
+        pr_debug("aaudio: Received command data %llx\n", c->data_size);
+        print_hex_dump(KERN_DEBUG, "aaudio:IN ", DUMP_PREFIX_NONE, 32, 1, msg.data, min(msg.size, 128UL), true);
+#endif
+        aaudio_bce_in_queue_handle_msg(dev, &msg);
+
+        q->data_head = (q->data_head + 1) % q->el_count;
+
+        bce_notify_submission_complete(sq);
+        ++cnt;
+    }
+    aaudio_bce_in_queue_submit_pending(q, cnt);
+}
+
+static void aaudio_bce_in_queue_handle_msg(struct aaudio_device *a, struct aaudio_msg *msg)
+{
+    struct aaudio_msg_header *header = (struct aaudio_msg_header *) msg->data;
+    if (msg->size < sizeof(struct aaudio_msg_header)) {
+        pr_err("aaudio: Msg size smaller than header (%lx)", msg->size);
+        return;
+    }
+    if (header->type == AAUDIO_MSG_TYPE_RESPONSE) {
+        aaudio_handle_reply(&a->bcem, msg);
+    } else if (header->type == AAUDIO_MSG_TYPE_COMMAND) {
+        aaudio_handle_command(a, msg);
+    } else if (header->type == AAUDIO_MSG_TYPE_NOTIFICATION) {
+        aaudio_handle_notification(a, msg);
+    }
+}
+
+void aaudio_bce_in_queue_submit_pending(struct aaudio_bce_queue *q, size_t count)
+{
+    struct bce_qe_submission *s;
+    while (count--) {
+        if (bce_reserve_submission(q->sq, NULL)) {
+            pr_err("aaudio: Failed to reserve an event queue submission\n");
+            break;
+        }
+        s = bce_next_submission(q->sq);
+        bce_set_submission_single(s, q->dma_addr + (dma_addr_t) (q->data_tail * q->el_size), q->el_size);
+        q->data_tail = (q->data_tail + 1) % q->el_count;
+    }
+    bce_submit_to_device(q->sq);
+}
+
+struct aaudio_msg aaudio_reply_alloc(void)
+{
+    struct aaudio_msg ret;
+    ret.size = AAUDIO_BCE_QUEUE_ELEMENT_SIZE;
+    ret.data = kmalloc(ret.size, GFP_KERNEL);
+    return ret;
+}
+
+void aaudio_reply_free(struct aaudio_msg *reply)
+{
+    kfree(reply->data);
+}
diff --git a/drivers/staging/apple-bce/audio/protocol_bce.h b/drivers/staging/apple-bce/audio/protocol_bce.h
new file mode 100644
index 000000000000..14d26c05ddf9
--- /dev/null
+++ b/drivers/staging/apple-bce/audio/protocol_bce.h
@@ -0,0 +1,72 @@
+#ifndef AAUDIO_PROTOCOL_BCE_H
+#define AAUDIO_PROTOCOL_BCE_H
+
+#include "protocol.h"
+#include "../queue.h"
+
+#define AAUDIO_BCE_QUEUE_ELEMENT_SIZE 0x1000
+#define AAUDIO_BCE_QUEUE_ELEMENT_COUNT 20
+
+#define AAUDIO_BCE_QUEUE_TAG_COUNT 1000
+
+struct aaudio_device;
+
+struct aaudio_bce_queue_entry {
+    struct aaudio_msg *msg;
+    struct completion *cmpl;
+};
+struct aaudio_bce_queue {
+    struct bce_queue_cq *cq;
+    struct bce_queue_sq *sq;
+    void *data;
+    dma_addr_t dma_addr;
+    size_t data_head, data_tail;
+    size_t el_size, el_count;
+};
+struct aaudio_bce {
+    struct bce_queue_cq *cq;
+    struct aaudio_bce_queue qin;
+    struct aaudio_bce_queue qout;
+    int tag_num;
+    struct aaudio_bce_queue_entry *pending_entries[AAUDIO_BCE_QUEUE_TAG_COUNT];
+    struct spinlock spinlock;
+};
+
+struct aaudio_send_ctx {
+    int status;
+    int tag_n;
+    unsigned long irq_flags;
+    struct aaudio_msg msg;
+    unsigned long timeout;
+};
+
+int aaudio_bce_init(struct aaudio_device *dev);
+int __aaudio_send_prepare(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, char *tag);
+void __aaudio_send(struct aaudio_bce *b, struct aaudio_send_ctx *ctx);
+int __aaudio_send_cmd_sync(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, struct aaudio_msg *reply);
+
+#define aaudio_send_with_tag(a, ctx, tag, tout, fn, ...) ({ \
+    (ctx)->timeout = msecs_to_jiffies(tout); \
+    (ctx)->status = __aaudio_send_prepare(&(a)->bcem, (ctx), (tag)); \
+    if (!(ctx)->status) { \
+        fn(&(ctx)->msg, ##__VA_ARGS__); \
+        __aaudio_send(&(a)->bcem, (ctx)); \
+    } \
+    (ctx)->status; \
+})
+#define aaudio_send(a, ctx, tout, fn, ...) aaudio_send_with_tag(a, ctx, NULL, tout, fn, ##__VA_ARGS__)
+
+#define aaudio_send_cmd_sync(a, ctx, reply, tout, fn, ...) ({ \
+    (ctx)->timeout = msecs_to_jiffies(tout); \
+    (ctx)->status = __aaudio_send_prepare(&(a)->bcem, (ctx), NULL); \
+    if (!(ctx)->status) { \
+        fn(&(ctx)->msg, ##__VA_ARGS__); \
+        (ctx)->status = __aaudio_send_cmd_sync(&(a)->bcem, (ctx), (reply)); \
+    } \
+    (ctx)->status; \
+})
+
+struct aaudio_msg aaudio_reply_alloc(void);
+void aaudio_reply_free(struct aaudio_msg *reply);
+
+#endif //AAUDIO_PROTOCOL_BCE_H
diff --git a/drivers/staging/apple-bce/mailbox.c b/drivers/staging/apple-bce/mailbox.c
new file mode 100644
index 000000000000..07ce11928988
--- /dev/null
+++ b/drivers/staging/apple-bce/mailbox.c
@@ -0,0 +1,155 @@
+#include "mailbox.h"
+#include <linux/atomic.h>
+#include "apple_bce.h"
+#include <linux/version.h>
+
+#define REG_MBOX_OUT_BASE 0x820
+#define REG_MBOX_REPLY_COUNTER 0x108
+#define REG_MBOX_REPLY_BASE 0x810
+#define REG_TIMESTAMP_BASE 0xC000
+
+#define BCE_MBOX_TIMEOUT_MS 200
+
+void bce_mailbox_init(struct bce_mailbox *mb, void __iomem *reg_mb)
+{
+    mb->reg_mb = reg_mb;
+    init_completion(&mb->mb_completion);
+}
+
+int bce_mailbox_send(struct bce_mailbox *mb, u64 msg, u64* recv)
+{
+    u32 __iomem *regb;
+
+    if (atomic_cmpxchg(&mb->mb_status, 0, 1) != 0) {
+        return -EEXIST; // We don't support two messages at once
+    }
+    reinit_completion(&mb->mb_completion);
+
+    pr_debug("bce_mailbox_send: %llx\n", msg);
+    regb = (u32*) ((u8*) mb->reg_mb + REG_MBOX_OUT_BASE);
+    iowrite32((u32) msg, regb);
+    iowrite32((u32) (msg >> 32), regb + 1);
+    iowrite32(0, regb + 2);
+    iowrite32(0, regb + 3);
+
+    wait_for_completion_timeout(&mb->mb_completion, msecs_to_jiffies(BCE_MBOX_TIMEOUT_MS));
+    if (atomic_read(&mb->mb_status) != 2) { // Didn't get the reply
+        atomic_set(&mb->mb_status, 0);
+        return -ETIMEDOUT;
+    }
+
+    *recv = mb->mb_result;
+    pr_debug("bce_mailbox_send: reply %llx\n", *recv);
+
+    atomic_set(&mb->mb_status, 0);
+    return 0;
+}
+
+static int bce_mailbox_retrive_response(struct bce_mailbox *mb)
+{
+    u32 __iomem *regb;
+    u32 lo, hi;
+    int count, counter;
+    u32 res = ioread32((u8*) mb->reg_mb + REG_MBOX_REPLY_COUNTER);
+    count = (res >> 20) & 0xf;
+    counter = count;
+    pr_debug("bce_mailbox_retrive_response count=%i\n", count);
+    while (counter--) {
+        regb = (u32*) ((u8*) mb->reg_mb + REG_MBOX_REPLY_BASE);
+        lo = ioread32(regb);
+        hi = ioread32(regb + 1);
+        ioread32(regb + 2);
+        ioread32(regb + 3);
+        pr_debug("bce_mailbox_retrive_response %llx\n", ((u64) hi << 32) | lo);
+        mb->mb_result = ((u64) hi << 32) | lo;
+    }
+    return count > 0 ? 0 : -ENODATA;
+}
+
+int bce_mailbox_handle_interrupt(struct bce_mailbox *mb)
+{
+    int status = bce_mailbox_retrive_response(mb);
+    if (!status) {
+        atomic_set(&mb->mb_status, 2);
+        complete(&mb->mb_completion);
+    }
+    return status;
+}
+
+static void bc_send_timestamp(struct timer_list *tl);
+
+void bce_timestamp_init(struct bce_timestamp *ts, void __iomem *reg)
+{
+    u32 __iomem *regb;
+
+    spin_lock_init(&ts->stop_sl);
+    ts->stopped = false;
+
+    ts->reg = reg;
+
+    regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
+
+    ioread32(regb);
+    mb();
+
+    timer_setup(&ts->timer, bc_send_timestamp, 0);
+}
+
+void bce_timestamp_start(struct bce_timestamp *ts, bool is_initial)
+{
+    unsigned long flags;
+    u32 __iomem *regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
+
+    if (is_initial) {
+        iowrite32((u32) -4, regb + 2);
+        iowrite32((u32) -1, regb);
+    } else {
+        iowrite32((u32) -3, regb + 2);
+        iowrite32((u32) -1, regb);
+    }
+
+    spin_lock_irqsave(&ts->stop_sl, flags);
+    ts->stopped = false;
+    spin_unlock_irqrestore(&ts->stop_sl, flags);
+    mod_timer(&ts->timer, jiffies + msecs_to_jiffies(150));
+}
+
+void bce_timestamp_stop(struct bce_timestamp *ts)
+{
+    unsigned long flags;
+    u32 __iomem *regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
+
+    spin_lock_irqsave(&ts->stop_sl, flags);
+    ts->stopped = true;
+    spin_unlock_irqrestore(&ts->stop_sl, flags);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,15,0)
+    del_timer_sync(&ts->timer);
+#else
+    timer_delete_sync(&ts->timer);
+#endif
+    iowrite32((u32) -2, regb + 2);
+    iowrite32((u32) -1, regb);
+}
+
+static void bc_send_timestamp(struct timer_list *tl)
+{
+    struct bce_timestamp *ts;
+    unsigned long flags;
+    u32 __iomem *regb;
+    ktime_t bt;
+
+    ts = container_of(tl, struct bce_timestamp, timer);
+    regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
+    local_irq_save(flags);
+    ioread32(regb + 2);
+    mb();
+    bt = ktime_get_boottime();
+    iowrite32((u32) bt, regb + 2);
+    iowrite32((u32) (bt >> 32), regb);
+
+    spin_lock(&ts->stop_sl);
+    if (!ts->stopped)
+        mod_timer(&ts->timer, jiffies + msecs_to_jiffies(150));
+    spin_unlock(&ts->stop_sl);
+    local_irq_restore(flags);
+}
diff --git a/drivers/staging/apple-bce/mailbox.h b/drivers/staging/apple-bce/mailbox.h
new file mode 100644
index 000000000000..f3323f95ba51
--- /dev/null
+++ b/drivers/staging/apple-bce/mailbox.h
@@ -0,0 +1,53 @@
+#ifndef BCE_MAILBOX_H
+#define BCE_MAILBOX_H
+
+#include <linux/completion.h>
+#include <linux/pci.h>
+#include <linux/timer.h>
+
+struct bce_mailbox {
+    void __iomem *reg_mb;
+
+    atomic_t mb_status; // possible statuses: 0 (no msg), 1 (has active msg), 2 (got reply)
+    struct completion mb_completion;
+    uint64_t mb_result;
+};
+
+enum bce_message_type {
+    BCE_MB_REGISTER_COMMAND_SQ = 0x7,            // to-device
+    BCE_MB_REGISTER_COMMAND_CQ = 0x8,            // to-device
+    BCE_MB_REGISTER_COMMAND_QUEUE_REPLY = 0xB,   // to-host
+    BCE_MB_SET_FW_PROTOCOL_VERSION = 0xC,        // both
+    BCE_MB_SLEEP_NO_STATE = 0x14,                // to-device
+    BCE_MB_RESTORE_NO_STATE = 0x15,              // to-device
+    BCE_MB_SAVE_STATE_AND_SLEEP = 0x17,          // to-device
+    BCE_MB_RESTORE_STATE_AND_WAKE = 0x18,        // to-device
+    BCE_MB_SAVE_STATE_AND_SLEEP_FAILURE = 0x19,  // from-device
+    BCE_MB_SAVE_RESTORE_STATE_COMPLETE = 0x1A,   // from-device
+};
+
+#define BCE_MB_MSG(type, value) (((u64) (type) << 58) | ((value) & 0x3FFFFFFFFFFFFFFLL))
+#define BCE_MB_TYPE(v) ((u32) (v >> 58))
+#define BCE_MB_VALUE(v) (v & 0x3FFFFFFFFFFFFFFLL)
+
+void bce_mailbox_init(struct bce_mailbox *mb, void __iomem *reg_mb);
+
+int bce_mailbox_send(struct bce_mailbox *mb, u64 msg, u64* recv);
+
+int bce_mailbox_handle_interrupt(struct bce_mailbox *mb);
+
+
+struct bce_timestamp {
+    void __iomem *reg;
+    struct timer_list timer;
+    struct spinlock stop_sl;
+    bool stopped;
+};
+
+void bce_timestamp_init(struct bce_timestamp *ts, void __iomem *reg);
+
+void bce_timestamp_start(struct bce_timestamp *ts, bool is_initial);
+
+void bce_timestamp_stop(struct bce_timestamp *ts);
+
+#endif //BCEDRIVER_MAILBOX_H
diff --git a/drivers/staging/apple-bce/queue.c b/drivers/staging/apple-bce/queue.c
new file mode 100644
index 000000000000..948286157e3a
--- /dev/null
+++ b/drivers/staging/apple-bce/queue.c
@@ -0,0 +1,415 @@
+#include "queue.h"
+#include "apple_bce.h"
+#include <linux/version.h>
+
+#define REG_DOORBELL_BASE 0x44000
+
+struct bce_queue_cq *bce_alloc_cq(struct apple_bce_device *dev, int qid, u32 el_count)
+{
+    struct bce_queue_cq *q;
+    q = kzalloc(sizeof(struct bce_queue_cq), GFP_KERNEL);
+    q->qid = qid;
+    q->type = BCE_QUEUE_CQ;
+    q->el_count = el_count;
+    q->data = dma_alloc_coherent(&dev->pci->dev, el_count * sizeof(struct bce_qe_completion),
+            &q->dma_handle, GFP_KERNEL);
+    if (!q->data) {
+        pr_err("DMA queue memory alloc failed\n");
+        kfree(q);
+        return NULL;
+    }
+    return q;
+}
+
+void bce_get_cq_memcfg(struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg)
+{
+    cfg->qid = (u16) cq->qid;
+    cfg->el_count = (u16) cq->el_count;
+    cfg->vector_or_cq = 0;
+    cfg->_pad = 0;
+    cfg->addr = cq->dma_handle;
+    cfg->length = cq->el_count * sizeof(struct bce_qe_completion);
+}
+
+void bce_free_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq)
+{
+    dma_free_coherent(&dev->pci->dev, cq->el_count * sizeof(struct bce_qe_completion), cq->data, cq->dma_handle);
+    kfree(cq);
+}
+
+static void bce_handle_cq_completion(struct apple_bce_device *dev, struct bce_qe_completion *e, size_t *ce)
+{
+    struct bce_queue *target;
+    struct bce_queue_sq *target_sq;
+    struct bce_sq_completion_data *cmpl;
+    if (e->qid >= BCE_MAX_QUEUE_COUNT) {
+        pr_err("Device sent a response for qid (%u) >= BCE_MAX_QUEUE_COUNT\n", e->qid);
+        return;
+    }
+    target = dev->queues[e->qid];
+    if (!target || target->type != BCE_QUEUE_SQ) {
+        pr_err("Device sent a response for qid (%u), which does not exist\n", e->qid);
+        return;
+    }
+    target_sq = (struct bce_queue_sq *) target;
+    if (target_sq->completion_tail != e->completion_index) {
+        pr_err("Completion index mismatch; this is likely going to make this driver unusable\n");
+        return;
+    }
+    if (!target_sq->has_pending_completions) {
+        target_sq->has_pending_completions = true;
+        dev->int_sq_list[(*ce)++] = target_sq;
+    }
+    cmpl = &target_sq->completion_data[e->completion_index];
+    cmpl->status = e->status;
+    cmpl->data_size = e->data_size;
+    cmpl->result = e->result;
+    wmb();
+    target_sq->completion_tail = (target_sq->completion_tail + 1) % target_sq->el_count;
+}
+
+void bce_handle_cq_completions(struct apple_bce_device *dev, struct bce_queue_cq *cq)
+{
+    size_t ce = 0;
+    struct bce_qe_completion *e;
+    struct bce_queue_sq *sq;
+    e = bce_cq_element(cq, cq->index);
+    if (!(e->flags & BCE_COMPLETION_FLAG_PENDING))
+        return;
+    mb();
+    while (true) {
+        e = bce_cq_element(cq, cq->index);
+        if (!(e->flags & BCE_COMPLETION_FLAG_PENDING))
+            break;
+        // pr_info("apple-bce: compl: %i: %i %llx %llx", e->qid, e->status, e->data_size, e->result);
+        bce_handle_cq_completion(dev, e, &ce);
+        e->flags = 0;
+        cq->index = (cq->index + 1) % cq->el_count;
+    }
+    mb();
+    iowrite32(cq->index, (u32 *) ((u8 *) dev->reg_mem_dma +  REG_DOORBELL_BASE) + cq->qid);
+    while (ce) {
+        --ce;
+        sq = dev->int_sq_list[ce];
+        sq->completion(sq);
+        sq->has_pending_completions = false;
+    }
+}
+
+
+struct bce_queue_sq *bce_alloc_sq(struct apple_bce_device *dev, int qid, u32 el_size, u32 el_count,
+        bce_sq_completion compl, void *userdata)
+{
+    struct bce_queue_sq *q;
+    q = kzalloc(sizeof(struct bce_queue_sq), GFP_KERNEL);
+    q->qid = qid;
+    q->type = BCE_QUEUE_SQ;
+    q->el_size = el_size;
+    q->el_count = el_count;
+    q->data = dma_alloc_coherent(&dev->pci->dev, el_count * el_size,
+                                 &q->dma_handle, GFP_KERNEL);
+    q->completion = compl;
+    q->userdata = userdata;
+    q->completion_data = kzalloc(sizeof(struct bce_sq_completion_data) * el_count, GFP_KERNEL);
+    q->reg_mem_dma = dev->reg_mem_dma;
+    atomic_set(&q->available_commands, el_count - 1);
+    init_completion(&q->available_command_completion);
+    atomic_set(&q->available_command_completion_waiting_count, 0);
+    if (!q->data) {
+        pr_err("DMA queue memory alloc failed\n");
+        kfree(q);
+        return NULL;
+    }
+    return q;
+}
+
+void bce_get_sq_memcfg(struct bce_queue_sq *sq, struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg)
+{
+    cfg->qid = (u16) sq->qid;
+    cfg->el_count = (u16) sq->el_count;
+    cfg->vector_or_cq = (u16) cq->qid;
+    cfg->_pad = 0;
+    cfg->addr = sq->dma_handle;
+    cfg->length = sq->el_count * sq->el_size;
+}
+
+void bce_free_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq)
+{
+    dma_free_coherent(&dev->pci->dev, sq->el_count * sq->el_size, sq->data, sq->dma_handle);
+    kfree(sq);
+}
+
+int bce_reserve_submission(struct bce_queue_sq *sq, unsigned long *timeout)
+{
+    while (atomic_dec_if_positive(&sq->available_commands) < 0) {
+        if (!timeout || !*timeout)
+            return -EAGAIN;
+        atomic_inc(&sq->available_command_completion_waiting_count);
+        *timeout = wait_for_completion_timeout(&sq->available_command_completion, *timeout);
+        if (!*timeout) {
+            if (atomic_dec_if_positive(&sq->available_command_completion_waiting_count) < 0)
+                try_wait_for_completion(&sq->available_command_completion); /* consume the pending completion */
+        }
+    }
+    return 0;
+}
+
+void bce_cancel_submission_reservation(struct bce_queue_sq *sq)
+{
+    atomic_inc(&sq->available_commands);
+}
+
+void *bce_next_submission(struct bce_queue_sq *sq)
+{
+    void *ret = bce_sq_element(sq, sq->tail);
+    sq->tail = (sq->tail + 1) % sq->el_count;
+    return ret;
+}
+
+void bce_submit_to_device(struct bce_queue_sq *sq)
+{
+    mb();
+    iowrite32(sq->tail, (u32 *) ((u8 *) sq->reg_mem_dma +  REG_DOORBELL_BASE) + sq->qid);
+}
+
+void bce_notify_submission_complete(struct bce_queue_sq *sq)
+{
+    sq->head = (sq->head + 1) % sq->el_count;
+    atomic_inc(&sq->available_commands);
+    if (atomic_dec_if_positive(&sq->available_command_completion_waiting_count) >= 0) {
+        complete(&sq->available_command_completion);
+    }
+}
+
+void bce_set_submission_single(struct bce_qe_submission *element, dma_addr_t addr, size_t size)
+{
+    element->addr = addr;
+    element->length = size;
+    element->segl_addr = element->segl_length = 0;
+}
+
+static void bce_cmdq_completion(struct bce_queue_sq *q);
+
+struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 el_count)
+{
+    struct bce_queue_cmdq *q;
+    q = kzalloc(sizeof(struct bce_queue_cmdq), GFP_KERNEL);
+    q->sq = bce_alloc_sq(dev, qid, BCE_CMD_SIZE, el_count, bce_cmdq_completion, q);
+    if (!q->sq) {
+        kfree(q);
+        return NULL;
+    }
+    spin_lock_init(&q->lck);
+    q->tres = kzalloc(sizeof(struct bce_queue_cmdq_result_el*) * el_count, GFP_KERNEL);
+    if (!q->tres) {
+        kfree(q);
+        return NULL;
+    }
+    return q;
+}
+
+void bce_free_cmdq(struct apple_bce_device *dev, struct bce_queue_cmdq *cmdq)
+{
+    bce_free_sq(dev, cmdq->sq);
+    kfree(cmdq->tres);
+    kfree(cmdq);
+}
+
+void bce_cmdq_completion(struct bce_queue_sq *q)
+{
+    struct bce_queue_cmdq_result_el *el;
+    struct bce_queue_cmdq *cmdq = q->userdata;
+    struct bce_sq_completion_data *result;
+
+    spin_lock(&cmdq->lck);
+    while ((result = bce_next_completion(q))) {
+        el = cmdq->tres[cmdq->sq->head];
+        if (el) {
+            el->result = result->result;
+            el->status = result->status;
+            mb();
+            complete(&el->cmpl);
+        } else {
+            pr_err("apple-bce: Unexpected command queue completion\n");
+        }
+        cmdq->tres[cmdq->sq->head] = NULL;
+        bce_notify_submission_complete(q);
+    }
+    spin_unlock(&cmdq->lck);
+}
+
+static __always_inline void *bce_cmd_start(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res)
+{
+    void *ret;
+    unsigned long timeout;
+    init_completion(&res->cmpl);
+    mb();
+
+    timeout = msecs_to_jiffies(1000L * 60 * 5); /* wait for up to ~5 minutes */
+    if (bce_reserve_submission(cmdq->sq, &timeout))
+        return NULL;
+
+    spin_lock(&cmdq->lck);
+    cmdq->tres[cmdq->sq->tail] = res;
+    ret = bce_next_submission(cmdq->sq);
+    return ret;
+}
+
+static __always_inline void bce_cmd_finish(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res)
+{
+    bce_submit_to_device(cmdq->sq);
+    spin_unlock(&cmdq->lck);
+
+    wait_for_completion(&res->cmpl);
+    mb();
+}
+
+u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, bool isdirout)
+{
+    struct bce_queue_cmdq_result_el res;
+    struct bce_cmdq_register_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res);
+    if (!cmd)
+        return (u32) -1;
+    cmd->cmd = BCE_CMD_REGISTER_MEMORY_QUEUE;
+    cmd->flags = (u16) ((name ? 2 : 0) | (isdirout ? 1 : 0));
+    cmd->qid = cfg->qid;
+    cmd->el_count = cfg->el_count;
+    cmd->vector_or_cq = cfg->vector_or_cq;
+    memset(cmd->name, 0, sizeof(cmd->name));
+    if (name) {
+        cmd->name_len = (u16) min(strlen(name), (size_t) sizeof(cmd->name));
+        memcpy(cmd->name, name, cmd->name_len);
+    } else {
+        cmd->name_len = 0;
+    }
+    cmd->addr = cfg->addr;
+    cmd->length = cfg->length;
+
+    bce_cmd_finish(cmdq, &res);
+    return res.status;
+}
+
+u32 bce_cmd_unregister_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid)
+{
+    struct bce_queue_cmdq_result_el res;
+    struct bce_cmdq_simple_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res);
+    if (!cmd)
+        return (u32) -1;
+    cmd->cmd = BCE_CMD_UNREGISTER_MEMORY_QUEUE;
+    cmd->flags = 0;
+    cmd->qid = qid;
+    bce_cmd_finish(cmdq, &res);
+    return res.status;
+}
+
+u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid)
+{
+    struct bce_queue_cmdq_result_el res;
+    struct bce_cmdq_simple_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res);
+    if (!cmd)
+        return (u32) -1;
+    cmd->cmd = BCE_CMD_FLUSH_MEMORY_QUEUE;
+    cmd->flags = 0;
+    cmd->qid = qid;
+    bce_cmd_finish(cmdq, &res);
+    return res.status;
+}
+
+
+struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count)
+{
+    struct bce_queue_cq *cq;
+    struct bce_queue_memcfg cfg;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,18,0)
+    int qid = ida_simple_get(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX, GFP_KERNEL);
+#else
+    int qid = ida_alloc_range(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX - 1, GFP_KERNEL);
+#endif
+    if (qid < 0)
+        return NULL;
+    cq = bce_alloc_cq(dev, qid, el_count);
+    if (!cq)
+        return NULL;
+    bce_get_cq_memcfg(cq, &cfg);
+    if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, NULL, false) != 0) {
+        pr_err("apple-bce: CQ registration failed (%i)", qid);
+        bce_free_cq(dev, cq);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,18,0)
+        ida_simple_remove(&dev->queue_ida, (uint) qid);
+#else
+        ida_free(&dev->queue_ida, (uint) qid);
+#endif
+        return NULL;
+    }
+    dev->queues[qid] = (struct bce_queue *) cq;
+    return cq;
+}
+
+struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, u32 el_count,
+        int direction, bce_sq_completion compl, void *userdata)
+{
+    struct bce_queue_sq *sq;
+    struct bce_queue_memcfg cfg;
+    int qid;
+    if (cq == NULL)
+        return NULL; /* cq can not be null */
+    if (name == NULL)
+        return NULL; /* name can not be null */
+    if (direction != DMA_TO_DEVICE && direction != DMA_FROM_DEVICE)
+        return NULL; /* unsupported direction */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,18,0)
+    qid = ida_simple_get(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX, GFP_KERNEL);
+#else
+    qid = ida_alloc_range(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX - 1, GFP_KERNEL);
+#endif
+    if (qid < 0)
+        return NULL;
+    sq = bce_alloc_sq(dev, qid, sizeof(struct bce_qe_submission), el_count, compl, userdata);
+    if (!sq)
+        return NULL;
+    bce_get_sq_memcfg(sq, cq, &cfg);
+    if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, name, direction != DMA_FROM_DEVICE) != 0) {
+        pr_err("apple-bce: SQ registration failed (%i)", qid);
+        bce_free_sq(dev, sq);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,18,0)
+        ida_simple_remove(&dev->queue_ida, (uint) qid);
+#else
+        ida_free(&dev->queue_ida, (uint) qid);
+#endif
+        return NULL;
+    }
+    spin_lock(&dev->queues_lock);
+    dev->queues[qid] = (struct bce_queue *) sq;
+    spin_unlock(&dev->queues_lock);
+    return sq;
+}
+
+void bce_destroy_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq)
+{
+    if (!dev->is_being_removed && bce_cmd_unregister_memory_queue(dev->cmd_cmdq, (u16) cq->qid))
+        pr_err("apple-bce: CQ unregister failed");
+    spin_lock(&dev->queues_lock);
+    dev->queues[cq->qid] = NULL;
+    spin_unlock(&dev->queues_lock);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,18,0)
+    ida_simple_remove(&dev->queue_ida, (uint) cq->qid);
+#else
+    ida_free(&dev->queue_ida, (uint) cq->qid);
+#endif
+    bce_free_cq(dev, cq);
+}
+
+void bce_destroy_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq)
+{
+    if (!dev->is_being_removed && bce_cmd_unregister_memory_queue(dev->cmd_cmdq, (u16) sq->qid))
+        pr_err("apple-bce: CQ unregister failed");
+    spin_lock(&dev->queues_lock);
+    dev->queues[sq->qid] = NULL;
+    spin_unlock(&dev->queues_lock);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,18,0)
+    ida_simple_remove(&dev->queue_ida, (uint) sq->qid);
+#else
+    ida_free(&dev->queue_ida, (uint) sq->qid);
+#endif
+    bce_free_sq(dev, sq);
+}
\ No newline at end of file
diff --git a/drivers/staging/apple-bce/queue.h b/drivers/staging/apple-bce/queue.h
new file mode 100644
index 000000000000..8368ac5dfca8
--- /dev/null
+++ b/drivers/staging/apple-bce/queue.h
@@ -0,0 +1,177 @@
+#ifndef BCE_QUEUE_H
+#define BCE_QUEUE_H
+
+#include <linux/completion.h>
+#include <linux/pci.h>
+
+#define BCE_CMD_SIZE 0x40
+
+struct apple_bce_device;
+
+enum bce_queue_type {
+    BCE_QUEUE_CQ, BCE_QUEUE_SQ
+};
+struct bce_queue {
+    int qid;
+    int type;
+};
+struct bce_queue_cq {
+    int qid;
+    int type;
+    u32 el_count;
+    dma_addr_t dma_handle;
+    void *data;
+
+    u32 index;
+};
+struct bce_queue_sq;
+typedef void (*bce_sq_completion)(struct bce_queue_sq *q);
+struct bce_sq_completion_data {
+    u32 status;
+    u64 data_size;
+    u64 result;
+};
+struct bce_queue_sq {
+    int qid;
+    int type;
+    u32 el_size;
+    u32 el_count;
+    dma_addr_t dma_handle;
+    void *data;
+    void *userdata;
+    void __iomem *reg_mem_dma;
+
+    atomic_t available_commands;
+    struct completion available_command_completion;
+    atomic_t available_command_completion_waiting_count;
+    u32 head, tail;
+
+    u32 completion_cidx, completion_tail;
+    struct bce_sq_completion_data *completion_data;
+    bool has_pending_completions;
+    bce_sq_completion completion;
+};
+
+struct bce_queue_cmdq_result_el {
+    struct completion cmpl;
+    u32 status;
+    u64 result;
+};
+struct bce_queue_cmdq {
+    struct bce_queue_sq *sq;
+    struct spinlock lck;
+    struct bce_queue_cmdq_result_el **tres;
+};
+
+struct bce_queue_memcfg {
+    u16 qid;
+    u16 el_count;
+    u16 vector_or_cq;
+    u16 _pad;
+    u64 addr;
+    u64 length;
+};
+
+enum bce_qe_completion_status {
+    BCE_COMPLETION_SUCCESS = 0,
+    BCE_COMPLETION_ERROR = 1,
+    BCE_COMPLETION_ABORTED = 2,
+    BCE_COMPLETION_NO_SPACE = 3,
+    BCE_COMPLETION_OVERRUN = 4
+};
+enum bce_qe_completion_flags {
+    BCE_COMPLETION_FLAG_PENDING = 0x8000
+};
+struct bce_qe_completion {
+    u64 result;
+    u64 data_size;
+    u16 qid;
+    u16 completion_index;
+    u16 status; // bce_qe_completion_status
+    u16 flags;  // bce_qe_completion_flags
+};
+
+struct bce_qe_submission {
+    u64 length;
+    u64 addr;
+
+    u64 segl_addr;
+    u64 segl_length;
+};
+
+enum bce_cmdq_command {
+    BCE_CMD_REGISTER_MEMORY_QUEUE = 0x20,
+    BCE_CMD_UNREGISTER_MEMORY_QUEUE = 0x30,
+    BCE_CMD_FLUSH_MEMORY_QUEUE = 0x40,
+    BCE_CMD_SET_MEMORY_QUEUE_PROPERTY = 0x50
+};
+struct bce_cmdq_simple_memory_queue_cmd {
+    u16 cmd; // bce_cmdq_command
+    u16 flags;
+    u16 qid;
+};
+struct bce_cmdq_register_memory_queue_cmd {
+    u16 cmd; // bce_cmdq_command
+    u16 flags;
+    u16 qid;
+    u16 _pad;
+    u16 el_count;
+    u16 vector_or_cq;
+    u16 _pad2;
+    u16 name_len;
+    char name[0x20];
+    u64 addr;
+    u64 length;
+};
+
+static __always_inline void *bce_sq_element(struct bce_queue_sq *q, int i) {
+    return (void *) ((u8 *) q->data + q->el_size * i);
+}
+static __always_inline void *bce_cq_element(struct bce_queue_cq *q, int i) {
+    return (void *) ((struct bce_qe_completion *) q->data + i);
+}
+
+static __always_inline struct bce_sq_completion_data *bce_next_completion(struct bce_queue_sq *sq) {
+    struct bce_sq_completion_data *res;
+    rmb();
+    if (sq->completion_cidx == sq->completion_tail)
+        return NULL;
+    res = &sq->completion_data[sq->completion_cidx];
+    sq->completion_cidx = (sq->completion_cidx + 1) % sq->el_count;
+    return res;
+}
+
+struct bce_queue_cq *bce_alloc_cq(struct apple_bce_device *dev, int qid, u32 el_count);
+void bce_get_cq_memcfg(struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg);
+void bce_free_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq);
+void bce_handle_cq_completions(struct apple_bce_device *dev, struct bce_queue_cq *cq);
+
+struct bce_queue_sq *bce_alloc_sq(struct apple_bce_device *dev, int qid, u32 el_size, u32 el_count,
+        bce_sq_completion compl, void *userdata);
+void bce_get_sq_memcfg(struct bce_queue_sq *sq, struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg);
+void bce_free_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq);
+int bce_reserve_submission(struct bce_queue_sq *sq, unsigned long *timeout);
+void bce_cancel_submission_reservation(struct bce_queue_sq *sq);
+void *bce_next_submission(struct bce_queue_sq *sq);
+void bce_submit_to_device(struct bce_queue_sq *sq);
+void bce_notify_submission_complete(struct bce_queue_sq *sq);
+
+void bce_set_submission_single(struct bce_qe_submission *element, dma_addr_t addr, size_t size);
+
+struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 el_count);
+void bce_free_cmdq(struct apple_bce_device *dev, struct bce_queue_cmdq *cmdq);
+
+u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, bool isdirout);
+u32 bce_cmd_unregister_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid);
+u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid);
+
+
+/* User API - Creates and registers the queue */
+
+struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count);
+struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, u32 el_count,
+        int direction, bce_sq_completion compl, void *userdata);
+void bce_destroy_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq);
+void bce_destroy_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq);
+
+#endif //BCEDRIVER_MAILBOX_H
diff --git a/drivers/staging/apple-bce/queue_dma.c b/drivers/staging/apple-bce/queue_dma.c
new file mode 100644
index 000000000000..b236613285c0
--- /dev/null
+++ b/drivers/staging/apple-bce/queue_dma.c
@@ -0,0 +1,220 @@
+#include "queue_dma.h"
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include "queue.h"
+
+static int bce_alloc_scatterlist_from_vm(struct sg_table *tbl, void *data, size_t len);
+static struct bce_segment_list_element_hostinfo *bce_map_segment_list(
+        struct device *dev, struct scatterlist *pages, int pagen);
+static void bce_unmap_segement_list(struct device *dev, struct bce_segment_list_element_hostinfo *list);
+
+int bce_map_dma_buffer(struct device *dev, struct bce_dma_buffer *buf, struct sg_table scatterlist,
+        enum dma_data_direction dir)
+{
+    int cnt;
+
+    buf->direction = dir;
+    buf->scatterlist = scatterlist;
+    buf->seglist_hostinfo = NULL;
+
+    cnt = dma_map_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir);
+    if (cnt != buf->scatterlist.nents) {
+        pr_err("apple-bce: DMA scatter list mapping returned an unexpected count: %i\n", cnt);
+        dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir);
+        return -EIO;
+    }
+    if (cnt == 1)
+        return 0;
+
+    buf->seglist_hostinfo = bce_map_segment_list(dev, buf->scatterlist.sgl, buf->scatterlist.nents);
+    if (!buf->seglist_hostinfo) {
+        pr_err("apple-bce: Creating segment list failed\n");
+        dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir);
+        return -EIO;
+    }
+    return 0;
+}
+
+int bce_map_dma_buffer_vm(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
+                          enum dma_data_direction dir)
+{
+    int status;
+    struct sg_table scatterlist;
+    if ((status = bce_alloc_scatterlist_from_vm(&scatterlist, data, len)))
+        return status;
+    if ((status = bce_map_dma_buffer(dev, buf, scatterlist, dir))) {
+        sg_free_table(&scatterlist);
+        return status;
+    }
+    return 0;
+}
+
+int bce_map_dma_buffer_km(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
+                          enum dma_data_direction dir)
+{
+    /* Kernel memory is continuous which is great for us. */
+    int status;
+    struct sg_table scatterlist;
+    if ((status = sg_alloc_table(&scatterlist, 1, GFP_KERNEL))) {
+        sg_free_table(&scatterlist);
+        return status;
+    }
+    sg_set_buf(scatterlist.sgl, data, (uint) len);
+    if ((status = bce_map_dma_buffer(dev, buf, scatterlist, dir))) {
+        sg_free_table(&scatterlist);
+        return status;
+    }
+    return 0;
+}
+
+void bce_unmap_dma_buffer(struct device *dev, struct bce_dma_buffer *buf)
+{
+    dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, buf->direction);
+    bce_unmap_segement_list(dev, buf->seglist_hostinfo);
+}
+
+
+static int bce_alloc_scatterlist_from_vm(struct sg_table *tbl, void *data, size_t len)
+{
+    int status, i;
+    struct page **pages;
+    size_t off, start_page, end_page, page_count;
+    off        = (size_t) data % PAGE_SIZE;
+    start_page = (size_t) data  / PAGE_SIZE;
+    end_page   = ((size_t) data + len - 1) / PAGE_SIZE;
+    page_count = end_page - start_page + 1;
+
+    if (page_count > PAGE_SIZE / sizeof(struct page *))
+        pages = vmalloc(page_count * sizeof(struct page *));
+    else
+        pages = kmalloc(page_count * sizeof(struct page *), GFP_KERNEL);
+
+    for (i = 0; i < page_count; i++)
+        pages[i] = vmalloc_to_page((void *) ((start_page + i) * PAGE_SIZE));
+
+    if ((status = sg_alloc_table_from_pages(tbl, pages, page_count, (unsigned int) off, len, GFP_KERNEL))) {
+        sg_free_table(tbl);
+    }
+
+    if (page_count > PAGE_SIZE / sizeof(struct page *))
+        vfree(pages);
+    else
+        kfree(pages);
+    return status;
+}
+
+#define BCE_ELEMENTS_PER_PAGE ((PAGE_SIZE - sizeof(struct bce_segment_list_header)) \
+                               / sizeof(struct bce_segment_list_element))
+#define BCE_ELEMENTS_PER_ADDITIONAL_PAGE (PAGE_SIZE / sizeof(struct bce_segment_list_element))
+
+static struct bce_segment_list_element_hostinfo *bce_map_segment_list(
+        struct device *dev, struct scatterlist *pages, int pagen)
+{
+    size_t ptr, pptr = 0;
+    struct bce_segment_list_header theader; /* a temp header, to store the initial seg */
+    struct bce_segment_list_header *header;
+    struct bce_segment_list_element *el, *el_end;
+    struct bce_segment_list_element_hostinfo *out, *pout, *out_root;
+    struct scatterlist *sg;
+    int i;
+    header = &theader;
+    out = out_root = NULL;
+    el = el_end = NULL;
+    for_each_sg(pages, sg, pagen, i) {
+        if (el >= el_end) {
+            /* allocate a new page, this will be also done for the first element */
+            ptr = __get_free_page(GFP_KERNEL);
+            if (pptr && ptr == pptr + PAGE_SIZE) {
+                out->page_count++;
+                header->element_count += BCE_ELEMENTS_PER_ADDITIONAL_PAGE;
+                el_end += BCE_ELEMENTS_PER_ADDITIONAL_PAGE;
+            } else {
+                header = (void *) ptr;
+                header->element_count = BCE_ELEMENTS_PER_PAGE;
+                header->data_size = 0;
+                header->next_segl_addr = 0;
+                header->next_segl_length = 0;
+                el = (void *) (header + 1);
+                el_end = el + BCE_ELEMENTS_PER_PAGE;
+
+                if (out) {
+                    out->next = kmalloc(sizeof(struct bce_segment_list_element_hostinfo), GFP_KERNEL);
+                    out = out->next;
+                } else {
+                    out_root = out = kmalloc(sizeof(struct bce_segment_list_element_hostinfo), GFP_KERNEL);
+                }
+                out->page_start = (void *) ptr;
+                out->page_count = 1;
+                out->dma_start = DMA_MAPPING_ERROR;
+                out->next = NULL;
+            }
+            pptr = ptr;
+        }
+        el->addr = sg->dma_address;
+        el->length = sg->length;
+        header->data_size += el->length;
+    }
+
+    /* DMA map */
+    out = out_root;
+    pout = NULL;
+    while (out) {
+        out->dma_start = dma_map_single(dev, out->page_start, out->page_count * PAGE_SIZE, DMA_TO_DEVICE);
+        if (dma_mapping_error(dev, out->dma_start))
+            goto error;
+        if (pout) {
+            header = pout->page_start;
+            header->next_segl_addr = out->dma_start;
+            header->next_segl_length = out->page_count * PAGE_SIZE;
+        }
+        pout = out;
+        out = out->next;
+    }
+    return out_root;
+
+    error:
+    bce_unmap_segement_list(dev, out_root);
+    return NULL;
+}
+
+static void bce_unmap_segement_list(struct device *dev, struct bce_segment_list_element_hostinfo *list)
+{
+    struct bce_segment_list_element_hostinfo *next;
+    while (list) {
+        if (list->dma_start != DMA_MAPPING_ERROR)
+            dma_unmap_single(dev, list->dma_start, list->page_count * PAGE_SIZE, DMA_TO_DEVICE);
+        next = list->next;
+        kfree(list);
+        list = next;
+    }
+}
+
+int bce_set_submission_buf(struct bce_qe_submission *element, struct bce_dma_buffer *buf, size_t offset, size_t length)
+{
+    struct bce_segment_list_element_hostinfo *seg;
+    struct bce_segment_list_header *seg_header;
+
+    seg = buf->seglist_hostinfo;
+    if (!seg) {
+        element->addr = buf->scatterlist.sgl->dma_address + offset;
+        element->length = length;
+        element->segl_addr = 0;
+        element->segl_length = 0;
+        return 0;
+    }
+
+    while (seg) {
+        seg_header = seg->page_start;
+        if (offset <= seg_header->data_size)
+            break;
+        offset -= seg_header->data_size;
+        seg = seg->next;
+    }
+    if (!seg)
+        return -EINVAL;
+    element->addr = offset;
+    element->length = buf->scatterlist.sgl->dma_length;
+    element->segl_addr = seg->dma_start;
+    element->segl_length = seg->page_count * PAGE_SIZE;
+    return 0;
+}
\ No newline at end of file
diff --git a/drivers/staging/apple-bce/queue_dma.h b/drivers/staging/apple-bce/queue_dma.h
new file mode 100644
index 000000000000..f8a57e50e7a3
--- /dev/null
+++ b/drivers/staging/apple-bce/queue_dma.h
@@ -0,0 +1,50 @@
+#ifndef BCE_QUEUE_DMA_H
+#define BCE_QUEUE_DMA_H
+
+#include <linux/pci.h>
+
+struct bce_qe_submission;
+
+struct bce_segment_list_header {
+    u64 element_count;
+    u64 data_size;
+
+    u64 next_segl_addr;
+    u64 next_segl_length;
+};
+struct bce_segment_list_element {
+    u64 addr;
+    u64 length;
+};
+
+struct bce_segment_list_element_hostinfo {
+    struct bce_segment_list_element_hostinfo *next;
+    void *page_start;
+    size_t page_count;
+    dma_addr_t dma_start;
+};
+
+
+struct bce_dma_buffer {
+    enum dma_data_direction direction;
+    struct sg_table scatterlist;
+    struct bce_segment_list_element_hostinfo *seglist_hostinfo;
+};
+
+/* NOTE: Takes ownership of the sg_table if it succeeds. Ownership is not transferred on failure. */
+int bce_map_dma_buffer(struct device *dev, struct bce_dma_buffer *buf, struct sg_table scatterlist,
+        enum dma_data_direction dir);
+
+/* Creates a buffer from virtual memory (vmalloc) */
+int bce_map_dma_buffer_vm(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
+        enum dma_data_direction dir);
+
+/* Creates a buffer from kernel memory (kmalloc) */
+int bce_map_dma_buffer_km(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
+                          enum dma_data_direction dir);
+
+void bce_unmap_dma_buffer(struct device *dev, struct bce_dma_buffer *buf);
+
+int bce_set_submission_buf(struct bce_qe_submission *element, struct bce_dma_buffer *buf, size_t offset, size_t length);
+
+#endif //BCE_QUEUE_DMA_H
diff --git a/drivers/staging/apple-bce/vhci/command.h b/drivers/staging/apple-bce/vhci/command.h
new file mode 100644
index 000000000000..26619e0bccfa
--- /dev/null
+++ b/drivers/staging/apple-bce/vhci/command.h
@@ -0,0 +1,204 @@
+#ifndef BCE_VHCI_COMMAND_H
+#define BCE_VHCI_COMMAND_H
+
+#include "queue.h"
+#include <linux/jiffies.h>
+#include <linux/usb.h>
+
+#define BCE_VHCI_CMD_TIMEOUT_SHORT msecs_to_jiffies(2000)
+#define BCE_VHCI_CMD_TIMEOUT_LONG msecs_to_jiffies(30000)
+
+#define BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2 2
+#define BCE_VHCI_BULK_MAX_ACTIVE_URBS (1 << BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2)
+
+typedef u8 bce_vhci_port_t;
+typedef u8 bce_vhci_device_t;
+
+enum bce_vhci_command {
+    BCE_VHCI_CMD_CONTROLLER_ENABLE = 1,
+    BCE_VHCI_CMD_CONTROLLER_DISABLE = 2,
+    BCE_VHCI_CMD_CONTROLLER_START = 3,
+    BCE_VHCI_CMD_CONTROLLER_PAUSE = 4,
+
+    BCE_VHCI_CMD_PORT_POWER_ON = 0x10,
+    BCE_VHCI_CMD_PORT_POWER_OFF = 0x11,
+    BCE_VHCI_CMD_PORT_RESUME = 0x12,
+    BCE_VHCI_CMD_PORT_SUSPEND = 0x13,
+    BCE_VHCI_CMD_PORT_RESET = 0x14,
+    BCE_VHCI_CMD_PORT_DISABLE = 0x15,
+    BCE_VHCI_CMD_PORT_STATUS = 0x16,
+
+    BCE_VHCI_CMD_DEVICE_CREATE = 0x30,
+    BCE_VHCI_CMD_DEVICE_DESTROY = 0x31,
+
+    BCE_VHCI_CMD_ENDPOINT_CREATE = 0x40,
+    BCE_VHCI_CMD_ENDPOINT_DESTROY = 0x41,
+    BCE_VHCI_CMD_ENDPOINT_SET_STATE = 0x42,
+    BCE_VHCI_CMD_ENDPOINT_RESET = 0x44,
+
+    /* Device to host only */
+    BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE = 0x43,
+    BCE_VHCI_CMD_TRANSFER_REQUEST = 0x1000,
+    BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS = 0x1005
+};
+
+enum bce_vhci_endpoint_state {
+    BCE_VHCI_ENDPOINT_ACTIVE = 0,
+    BCE_VHCI_ENDPOINT_PAUSED = 1,
+    BCE_VHCI_ENDPOINT_STALLED = 2
+};
+
+static inline int bce_vhci_cmd_controller_enable(struct bce_vhci_command_queue *q, u8 busNum, u16 *portMask)
+{
+    int status;
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_CONTROLLER_ENABLE;
+    cmd.param1 = 0x7100u | busNum;
+    status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
+    if (!status)
+        *portMask = (u16) res.param2;
+    return status;
+}
+static inline int bce_vhci_cmd_controller_disable(struct bce_vhci_command_queue *q)
+{
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_CONTROLLER_DISABLE;
+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
+}
+static inline int bce_vhci_cmd_controller_start(struct bce_vhci_command_queue *q)
+{
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_CONTROLLER_START;
+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
+}
+static inline int bce_vhci_cmd_controller_pause(struct bce_vhci_command_queue *q)
+{
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_CONTROLLER_PAUSE;
+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
+}
+
+static inline int bce_vhci_cmd_port_power_on(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
+{
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_PORT_POWER_ON;
+    cmd.param1 = port;
+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
+}
+static inline int bce_vhci_cmd_port_power_off(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
+{
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_PORT_POWER_OFF;
+    cmd.param1 = port;
+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
+}
+static inline int bce_vhci_cmd_port_resume(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
+{
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_PORT_RESUME;
+    cmd.param1 = port;
+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
+}
+static inline int bce_vhci_cmd_port_suspend(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
+{
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_PORT_SUSPEND;
+    cmd.param1 = port;
+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
+}
+static inline int bce_vhci_cmd_port_reset(struct bce_vhci_command_queue *q, bce_vhci_port_t port, u32 timeout)
+{
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_PORT_RESET;
+    cmd.param1 = port;
+    cmd.param2 = timeout;
+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
+}
+static inline int bce_vhci_cmd_port_disable(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
+{
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_PORT_DISABLE;
+    cmd.param1 = port;
+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
+}
+static inline int bce_vhci_cmd_port_status(struct bce_vhci_command_queue *q, bce_vhci_port_t port,
+        u32 clearFlags, u32 *resStatus)
+{
+    int status;
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_PORT_STATUS;
+    cmd.param1 = port;
+    cmd.param2 = clearFlags & 0x560000;
+    status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
+    if (status >= 0)
+        *resStatus = (u32) res.param2;
+    return status;
+}
+
+static inline int bce_vhci_cmd_device_create(struct bce_vhci_command_queue *q, bce_vhci_port_t port,
+        bce_vhci_device_t *dev)
+{
+    int status;
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_DEVICE_CREATE;
+    cmd.param1 = port;
+    status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
+    if (!status)
+        *dev = (bce_vhci_device_t) res.param2;
+    return status;
+}
+static inline int bce_vhci_cmd_device_destroy(struct bce_vhci_command_queue *q, bce_vhci_device_t dev)
+{
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_DEVICE_DESTROY;
+    cmd.param1 = dev;
+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
+}
+
+static inline int bce_vhci_cmd_endpoint_create(struct bce_vhci_command_queue *q, bce_vhci_device_t dev,
+        struct usb_endpoint_descriptor *desc)
+{
+    struct bce_vhci_message cmd, res;
+    int endpoint_type = usb_endpoint_type(desc);
+    int maxp = usb_endpoint_maxp(desc);
+    int maxp_burst = usb_endpoint_maxp_mult(desc) * maxp;
+    u8 max_active_requests_pow2 = 0;
+    cmd.cmd = BCE_VHCI_CMD_ENDPOINT_CREATE;
+    cmd.param1 = dev | ((desc->bEndpointAddress & 0x8Fu) << 8);
+    if (endpoint_type == USB_ENDPOINT_XFER_BULK)
+        max_active_requests_pow2 = BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2;
+    cmd.param2 = endpoint_type | ((max_active_requests_pow2 & 0xf) << 4) | (maxp << 16) | ((u64) maxp_burst << 32);
+    if (endpoint_type == USB_ENDPOINT_XFER_INT)
+        cmd.param2 |= (desc->bInterval - 1) << 8;
+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
+}
+static inline int bce_vhci_cmd_endpoint_destroy(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint)
+{
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_ENDPOINT_DESTROY;
+    cmd.param1 = dev | (endpoint << 8);
+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
+}
+static inline int bce_vhci_cmd_endpoint_set_state(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint,
+        enum bce_vhci_endpoint_state newState, enum bce_vhci_endpoint_state *retState)
+{
+    int status;
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_ENDPOINT_SET_STATE;
+    cmd.param1 = dev | (endpoint << 8);
+    cmd.param2 = (u64) newState;
+    status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
+    if (status != BCE_VHCI_INTERNAL_ERROR && status != BCE_VHCI_NO_POWER)
+        *retState = (enum bce_vhci_endpoint_state) res.param2;
+    return status;
+}
+static inline int bce_vhci_cmd_endpoint_reset(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint)
+{
+    struct bce_vhci_message cmd, res;
+    cmd.cmd = BCE_VHCI_CMD_ENDPOINT_RESET;
+    cmd.param1 = dev | (endpoint << 8);
+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
+}
+
+
+#endif //BCE_VHCI_COMMAND_H
diff --git a/drivers/staging/apple-bce/vhci/queue.c b/drivers/staging/apple-bce/vhci/queue.c
new file mode 100644
index 000000000000..7b0b5027157b
--- /dev/null
+++ b/drivers/staging/apple-bce/vhci/queue.c
@@ -0,0 +1,268 @@
+#include "queue.h"
+#include "vhci.h"
+#include "../apple_bce.h"
+
+
+static void bce_vhci_message_queue_completion(struct bce_queue_sq *sq);
+
+int bce_vhci_message_queue_create(struct bce_vhci *vhci, struct bce_vhci_message_queue *ret, const char *name)
+{
+    int status;
+    ret->cq = bce_create_cq(vhci->dev, VHCI_EVENT_QUEUE_EL_COUNT);
+    if (!ret->cq)
+        return -EINVAL;
+    ret->sq = bce_create_sq(vhci->dev, ret->cq, name, VHCI_EVENT_QUEUE_EL_COUNT, DMA_TO_DEVICE,
+                            bce_vhci_message_queue_completion, ret);
+    if (!ret->sq) {
+        status = -EINVAL;
+        goto fail_cq;
+    }
+    ret->data = dma_alloc_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
+                                   &ret->dma_addr, GFP_KERNEL);
+    if (!ret->data) {
+        status = -EINVAL;
+        goto fail_sq;
+    }
+    return 0;
+
+fail_sq:
+    bce_destroy_sq(vhci->dev, ret->sq);
+    ret->sq = NULL;
+fail_cq:
+    bce_destroy_cq(vhci->dev, ret->cq);
+    ret->cq = NULL;
+    return status;
+}
+
+void bce_vhci_message_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_message_queue *q)
+{
+    if (!q->cq)
+        return;
+    dma_free_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
+                      q->data, q->dma_addr);
+    bce_destroy_sq(vhci->dev, q->sq);
+    bce_destroy_cq(vhci->dev, q->cq);
+}
+
+void bce_vhci_message_queue_write(struct bce_vhci_message_queue *q, struct bce_vhci_message *req)
+{
+    int sidx;
+    struct bce_qe_submission *s;
+    sidx = q->sq->tail;
+    s = bce_next_submission(q->sq);
+    pr_debug("bce-vhci: Send message: %x s=%x p1=%x p2=%llx\n", req->cmd, req->status, req->param1, req->param2);
+    q->data[sidx] = *req;
+    bce_set_submission_single(s, q->dma_addr + sizeof(struct bce_vhci_message) * sidx,
+            sizeof(struct bce_vhci_message));
+    bce_submit_to_device(q->sq);
+}
+
+static void bce_vhci_message_queue_completion(struct bce_queue_sq *sq)
+{
+    while (bce_next_completion(sq))
+        bce_notify_submission_complete(sq);
+}
+
+
+
+static void bce_vhci_event_queue_completion(struct bce_queue_sq *sq);
+
+int __bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
+                                  bce_sq_completion compl)
+{
+    ret->vhci = vhci;
+
+    ret->sq = bce_create_sq(vhci->dev, vhci->ev_cq, name, VHCI_EVENT_QUEUE_EL_COUNT, DMA_FROM_DEVICE, compl, ret);
+    if (!ret->sq)
+        return -EINVAL;
+    ret->data = dma_alloc_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
+                                   &ret->dma_addr, GFP_KERNEL);
+    if (!ret->data) {
+        bce_destroy_sq(vhci->dev, ret->sq);
+        ret->sq = NULL;
+        return -EINVAL;
+    }
+
+    init_completion(&ret->queue_empty_completion);
+    bce_vhci_event_queue_submit_pending(ret, VHCI_EVENT_PENDING_COUNT);
+    return 0;
+}
+
+int bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
+        bce_vhci_event_queue_callback cb)
+{
+    ret->cb = cb;
+    return __bce_vhci_event_queue_create(vhci, ret, name, bce_vhci_event_queue_completion);
+}
+
+void bce_vhci_event_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_event_queue *q)
+{
+    if (!q->sq)
+        return;
+    dma_free_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
+                      q->data, q->dma_addr);
+    bce_destroy_sq(vhci->dev, q->sq);
+}
+
+static void bce_vhci_event_queue_completion(struct bce_queue_sq *sq)
+{
+    struct bce_sq_completion_data *cd;
+    struct bce_vhci_event_queue *ev = sq->userdata;
+    struct bce_vhci_message *msg;
+    size_t cnt = 0;
+
+    while ((cd = bce_next_completion(sq))) {
+        if (cd->status == BCE_COMPLETION_ABORTED) { /* We flushed the queue */
+            bce_notify_submission_complete(sq);
+            continue;
+        }
+        msg = &ev->data[sq->head];
+        pr_debug("bce-vhci: Got event: %x s=%x p1=%x p2=%llx\n", msg->cmd, msg->status, msg->param1, msg->param2);
+        ev->cb(ev, msg);
+
+        bce_notify_submission_complete(sq);
+        ++cnt;
+    }
+    bce_vhci_event_queue_submit_pending(ev, cnt);
+    if (atomic_read(&sq->available_commands) == sq->el_count - 1)
+        complete(&ev->queue_empty_completion);
+}
+
+void bce_vhci_event_queue_submit_pending(struct bce_vhci_event_queue *q, size_t count)
+{
+    int idx;
+    struct bce_qe_submission *s;
+    while (count--) {
+        if (bce_reserve_submission(q->sq, NULL)) {
+            pr_err("bce-vhci: Failed to reserve an event queue submission\n");
+            break;
+        }
+        idx = q->sq->tail;
+        s = bce_next_submission(q->sq);
+        bce_set_submission_single(s,
+                                  q->dma_addr + idx * sizeof(struct bce_vhci_message), sizeof(struct bce_vhci_message));
+    }
+    bce_submit_to_device(q->sq);
+}
+
+void bce_vhci_event_queue_pause(struct bce_vhci_event_queue *q)
+{
+    unsigned long timeout;
+    reinit_completion(&q->queue_empty_completion);
+    if (bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, q->sq->qid))
+        pr_warn("bce-vhci: failed to flush event queue\n");
+    timeout = msecs_to_jiffies(5000);
+    while (atomic_read(&q->sq->available_commands) != q->sq->el_count - 1) {
+        timeout = wait_for_completion_timeout(&q->queue_empty_completion, timeout);
+        if (timeout == 0) {
+            pr_err("bce-vhci: waiting for queue to be flushed timed out\n");
+            break;
+        }
+    }
+}
+
+void bce_vhci_event_queue_resume(struct bce_vhci_event_queue *q)
+{
+    if (atomic_read(&q->sq->available_commands) != q->sq->el_count - 1) {
+        pr_err("bce-vhci: resume of a queue with pending submissions\n");
+        return;
+    }
+    bce_vhci_event_queue_submit_pending(q, VHCI_EVENT_PENDING_COUNT);
+}
+
+void bce_vhci_command_queue_create(struct bce_vhci_command_queue *ret, struct bce_vhci_message_queue *mq)
+{
+    ret->mq = mq;
+    ret->completion.result = NULL;
+    init_completion(&ret->completion.completion);
+    spin_lock_init(&ret->completion_lock);
+    mutex_init(&ret->mutex);
+}
+
+void bce_vhci_command_queue_destroy(struct bce_vhci_command_queue *cq)
+{
+    spin_lock(&cq->completion_lock);
+    if (cq->completion.result) {
+        memset(cq->completion.result, 0, sizeof(struct bce_vhci_message));
+        cq->completion.result->status = BCE_VHCI_ABORT;
+        complete(&cq->completion.completion);
+        cq->completion.result = NULL;
+    }
+    spin_unlock(&cq->completion_lock);
+    mutex_lock(&cq->mutex);
+    mutex_unlock(&cq->mutex);
+    mutex_destroy(&cq->mutex);
+}
+
+void bce_vhci_command_queue_deliver_completion(struct bce_vhci_command_queue *cq, struct bce_vhci_message *msg)
+{
+    struct bce_vhci_command_queue_completion *c = &cq->completion;
+
+    spin_lock(&cq->completion_lock);
+    if (c->result) {
+        *c->result = *msg;
+        complete(&c->completion);
+        c->result = NULL;
+    }
+    spin_unlock(&cq->completion_lock);
+}
+
+static int __bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req,
+        struct bce_vhci_message *res, unsigned long timeout)
+{
+    int status;
+    struct bce_vhci_command_queue_completion *c;
+    struct bce_vhci_message creq;
+    c = &cq->completion;
+
+    if ((status = bce_reserve_submission(cq->mq->sq, &timeout)))
+        return status;
+
+    spin_lock(&cq->completion_lock);
+    c->result = res;
+    reinit_completion(&c->completion);
+    spin_unlock(&cq->completion_lock);
+
+    bce_vhci_message_queue_write(cq->mq, req);
+
+    if (!wait_for_completion_timeout(&c->completion, timeout)) {
+        /* we ran out of time, send cancellation */
+        pr_debug("bce-vhci: command timed out req=%x\n", req->cmd);
+        if ((status = bce_reserve_submission(cq->mq->sq, &timeout)))
+            return status;
+
+        creq = *req;
+        creq.cmd |= 0x4000;
+        bce_vhci_message_queue_write(cq->mq, &creq);
+
+        if (!wait_for_completion_timeout(&c->completion, 1000)) {
+            pr_err("bce-vhci: Possible desync, cmd cancel timed out\n");
+
+            spin_lock(&cq->completion_lock);
+            c->result = NULL;
+            spin_unlock(&cq->completion_lock);
+            return -ETIMEDOUT;
+        }
+        if ((res->cmd & ~0x8000) == creq.cmd)
+            return -ETIMEDOUT;
+        /* reply for the previous command most likely arrived */
+    }
+
+    if ((res->cmd & ~0x8000) != req->cmd) {
+        pr_err("bce-vhci: Possible desync, cmd reply mismatch req=%x, res=%x\n", req->cmd, res->cmd);
+        return -EIO;
+    }
+    if (res->status == BCE_VHCI_SUCCESS)
+        return 0;
+    return res->status;
+}
+
+int bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req,
+                                   struct bce_vhci_message *res, unsigned long timeout)
+{
+    int status;
+    mutex_lock(&cq->mutex);
+    status = __bce_vhci_command_queue_execute(cq, req, res, timeout);
+    mutex_unlock(&cq->mutex);
+    return status;
+}
diff --git a/drivers/staging/apple-bce/vhci/queue.h b/drivers/staging/apple-bce/vhci/queue.h
new file mode 100644
index 000000000000..adb705b6ba1d
--- /dev/null
+++ b/drivers/staging/apple-bce/vhci/queue.h
@@ -0,0 +1,76 @@
+#ifndef BCE_VHCI_QUEUE_H
+#define BCE_VHCI_QUEUE_H
+
+#include <linux/completion.h>
+#include "../queue.h"
+
+#define VHCI_EVENT_QUEUE_EL_COUNT 256
+#define VHCI_EVENT_PENDING_COUNT 32
+
+struct bce_vhci;
+struct bce_vhci_event_queue;
+
+enum bce_vhci_message_status {
+    BCE_VHCI_SUCCESS = 1,
+    BCE_VHCI_ERROR = 2,
+    BCE_VHCI_USB_PIPE_STALL = 3,
+    BCE_VHCI_ABORT = 4,
+    BCE_VHCI_BAD_ARGUMENT = 5,
+    BCE_VHCI_OVERRUN = 6,
+    BCE_VHCI_INTERNAL_ERROR = 7,
+    BCE_VHCI_NO_POWER = 8,
+    BCE_VHCI_UNSUPPORTED = 9
+};
+struct bce_vhci_message {
+    u16 cmd;
+    u16 status; // bce_vhci_message_status
+    u32 param1;
+    u64 param2;
+};
+
+struct bce_vhci_message_queue {
+    struct bce_queue_cq *cq;
+    struct bce_queue_sq *sq;
+    struct bce_vhci_message *data;
+    dma_addr_t dma_addr;
+};
+typedef void (*bce_vhci_event_queue_callback)(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg);
+struct bce_vhci_event_queue {
+    struct bce_vhci *vhci;
+    struct bce_queue_sq *sq;
+    struct bce_vhci_message *data;
+    dma_addr_t dma_addr;
+    bce_vhci_event_queue_callback cb;
+    struct completion queue_empty_completion;
+};
+struct bce_vhci_command_queue_completion {
+    struct bce_vhci_message *result;
+    struct completion completion;
+};
+struct bce_vhci_command_queue {
+    struct bce_vhci_message_queue *mq;
+    struct bce_vhci_command_queue_completion completion;
+    struct spinlock completion_lock;
+    struct mutex mutex;
+};
+
+int bce_vhci_message_queue_create(struct bce_vhci *vhci, struct bce_vhci_message_queue *ret, const char *name);
+void bce_vhci_message_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_message_queue *q);
+void bce_vhci_message_queue_write(struct bce_vhci_message_queue *q, struct bce_vhci_message *req);
+
+int __bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
+        bce_sq_completion compl);
+int bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
+        bce_vhci_event_queue_callback cb);
+void bce_vhci_event_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_event_queue *q);
+void bce_vhci_event_queue_submit_pending(struct bce_vhci_event_queue *q, size_t count);
+void bce_vhci_event_queue_pause(struct bce_vhci_event_queue *q);
+void bce_vhci_event_queue_resume(struct bce_vhci_event_queue *q);
+
+void bce_vhci_command_queue_create(struct bce_vhci_command_queue *ret, struct bce_vhci_message_queue *mq);
+void bce_vhci_command_queue_destroy(struct bce_vhci_command_queue *cq);
+int bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req,
+        struct bce_vhci_message *res, unsigned long timeout);
+void bce_vhci_command_queue_deliver_completion(struct bce_vhci_command_queue *cq, struct bce_vhci_message *msg);
+
+#endif //BCE_VHCI_QUEUE_H
diff --git a/drivers/staging/apple-bce/vhci/transfer.c b/drivers/staging/apple-bce/vhci/transfer.c
new file mode 100644
index 000000000000..d77220707492
--- /dev/null
+++ b/drivers/staging/apple-bce/vhci/transfer.c
@@ -0,0 +1,673 @@
+#include "transfer.h"
+#include "../queue.h"
+#include "vhci.h"
+#include "../apple_bce.h"
+#include <linux/usb/hcd.h>
+
+static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq);
+static void bce_vhci_transfer_queue_giveback(struct bce_vhci_transfer_queue *q);
+static void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q);
+
+static int bce_vhci_urb_init(struct bce_vhci_urb *vurb);
+static int bce_vhci_urb_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg);
+static int bce_vhci_urb_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c);
+
+static void bce_vhci_transfer_queue_reset_w(struct work_struct *work);
+
+void bce_vhci_create_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q,
+        struct usb_host_endpoint *endp, bce_vhci_device_t dev_addr, enum dma_data_direction dir)
+{
+    char name[0x21];
+    INIT_LIST_HEAD(&q->evq);
+    INIT_LIST_HEAD(&q->giveback_urb_list);
+    spin_lock_init(&q->urb_lock);
+    mutex_init(&q->pause_lock);
+    q->vhci = vhci;
+    q->endp = endp;
+    q->dev_addr = dev_addr;
+    q->endp_addr = (u8) (endp->desc.bEndpointAddress & 0x8F);
+    q->state = BCE_VHCI_ENDPOINT_ACTIVE;
+    q->active = true;
+    q->stalled = false;
+    q->max_active_requests = 1;
+    if (usb_endpoint_type(&endp->desc) == USB_ENDPOINT_XFER_BULK)
+        q->max_active_requests = BCE_VHCI_BULK_MAX_ACTIVE_URBS;
+    q->remaining_active_requests = q->max_active_requests;
+    q->cq = bce_create_cq(vhci->dev, 0x100);
+    INIT_WORK(&q->w_reset, bce_vhci_transfer_queue_reset_w);
+    q->sq_in = NULL;
+    if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) {
+        snprintf(name, sizeof(name), "VHC1-%i-%02x", dev_addr, 0x80 | usb_endpoint_num(&endp->desc));
+        q->sq_in = bce_create_sq(vhci->dev, q->cq, name, 0x100, DMA_FROM_DEVICE,
+                                 bce_vhci_transfer_queue_completion, q);
+    }
+    q->sq_out = NULL;
+    if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) {
+        snprintf(name, sizeof(name), "VHC1-%i-%02x", dev_addr, usb_endpoint_num(&endp->desc));
+        q->sq_out = bce_create_sq(vhci->dev, q->cq, name, 0x100, DMA_TO_DEVICE,
+                                  bce_vhci_transfer_queue_completion, q);
+    }
+}
+
+void bce_vhci_destroy_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q)
+{
+    bce_vhci_transfer_queue_giveback(q);
+    bce_vhci_transfer_queue_remove_pending(q);
+    if (q->sq_in)
+        bce_destroy_sq(vhci->dev, q->sq_in);
+    if (q->sq_out)
+        bce_destroy_sq(vhci->dev, q->sq_out);
+    bce_destroy_cq(vhci->dev, q->cq);
+}
+
+static inline bool bce_vhci_transfer_queue_can_init_urb(struct bce_vhci_transfer_queue *q)
+{
+    return q->remaining_active_requests > 0;
+}
+
+static void bce_vhci_transfer_queue_defer_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg)
+{
+    struct bce_vhci_list_message *lm;
+    lm = kmalloc(sizeof(struct bce_vhci_list_message), GFP_KERNEL);
+    INIT_LIST_HEAD(&lm->list);
+    lm->msg = *msg;
+    list_add_tail(&lm->list, &q->evq);
+}
+
+static void bce_vhci_transfer_queue_giveback(struct bce_vhci_transfer_queue *q)
+{
+    unsigned long flags;
+    struct urb *urb;
+    spin_lock_irqsave(&q->urb_lock, flags);
+    while (!list_empty(&q->giveback_urb_list)) {
+        urb = list_first_entry(&q->giveback_urb_list, struct urb, urb_list);
+        list_del(&urb->urb_list);
+
+        spin_unlock_irqrestore(&q->urb_lock, flags);
+        usb_hcd_giveback_urb(q->vhci->hcd, urb, urb->status);
+        spin_lock_irqsave(&q->urb_lock, flags);
+    }
+    spin_unlock_irqrestore(&q->urb_lock, flags);
+}
+
+static void bce_vhci_transfer_queue_init_pending_urbs(struct bce_vhci_transfer_queue *q);
+
+static void bce_vhci_transfer_queue_deliver_pending(struct bce_vhci_transfer_queue *q)
+{
+    struct urb *urb;
+    struct bce_vhci_list_message *lm;
+
+    while (!list_empty(&q->endp->urb_list) && !list_empty(&q->evq)) {
+        urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list);
+
+        lm = list_first_entry(&q->evq, struct bce_vhci_list_message, list);
+        if (bce_vhci_urb_update(urb->hcpriv, &lm->msg) == -EAGAIN)
+            break;
+        list_del(&lm->list);
+        kfree(lm);
+    }
+
+    /* some of the URBs could have been completed, so initialize more URBs if possible */
+    bce_vhci_transfer_queue_init_pending_urbs(q);
+}
+
+static void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q)
+{
+    unsigned long flags;
+    struct bce_vhci_list_message *lm;
+    spin_lock_irqsave(&q->urb_lock, flags);
+    while (!list_empty(&q->evq)) {
+        lm = list_first_entry(&q->evq, struct bce_vhci_list_message, list);
+        list_del(&lm->list);
+        kfree(lm);
+    }
+    spin_unlock_irqrestore(&q->urb_lock, flags);
+}
+
+void bce_vhci_transfer_queue_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg)
+{
+    unsigned long flags;
+    struct bce_vhci_urb *turb;
+    struct urb *urb;
+    spin_lock_irqsave(&q->urb_lock, flags);
+    bce_vhci_transfer_queue_deliver_pending(q);
+
+    if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST &&
+        (!list_empty(&q->evq) || list_empty(&q->endp->urb_list))) {
+        bce_vhci_transfer_queue_defer_event(q, msg);
+        goto complete;
+    }
+    if (list_empty(&q->endp->urb_list)) {
+        pr_err("bce-vhci: [%02x] Unexpected transfer queue event\n", q->endp_addr);
+        goto complete;
+    }
+    urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list);
+    turb = urb->hcpriv;
+    if (bce_vhci_urb_update(turb, msg) == -EAGAIN) {
+        bce_vhci_transfer_queue_defer_event(q, msg);
+    } else {
+        bce_vhci_transfer_queue_init_pending_urbs(q);
+    }
+
+complete:
+    spin_unlock_irqrestore(&q->urb_lock, flags);
+    bce_vhci_transfer_queue_giveback(q);
+}
+
+static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq)
+{
+    unsigned long flags;
+    struct bce_sq_completion_data *c;
+    struct urb *urb;
+    struct bce_vhci_transfer_queue *q = sq->userdata;
+    spin_lock_irqsave(&q->urb_lock, flags);
+    while ((c = bce_next_completion(sq))) {
+        if (c->status == BCE_COMPLETION_ABORTED) { /* We flushed the queue */
+            pr_debug("bce-vhci: [%02x] Got an abort completion\n", q->endp_addr);
+            bce_notify_submission_complete(sq);
+            continue;
+        }
+        if (list_empty(&q->endp->urb_list)) {
+            pr_err("bce-vhci: [%02x] Got a completion while no requests are pending\n", q->endp_addr);
+            continue;
+        }
+        pr_debug("bce-vhci: [%02x] Got a transfer queue completion\n", q->endp_addr);
+        urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list);
+        bce_vhci_urb_transfer_completion(urb->hcpriv, c);
+        bce_notify_submission_complete(sq);
+    }
+    bce_vhci_transfer_queue_deliver_pending(q);
+    spin_unlock_irqrestore(&q->urb_lock, flags);
+    bce_vhci_transfer_queue_giveback(q);
+}
+
+int bce_vhci_transfer_queue_do_pause(struct bce_vhci_transfer_queue *q)
+{
+    unsigned long flags;
+    int status;
+    u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F);
+    spin_lock_irqsave(&q->urb_lock, flags);
+    q->active = false;
+    spin_unlock_irqrestore(&q->urb_lock, flags);
+    if (q->sq_out) {
+        pr_err("bce-vhci: Not implemented: wait for pending output requests\n");
+    }
+    bce_vhci_transfer_queue_remove_pending(q);
+    if ((status = bce_vhci_cmd_endpoint_set_state(
+            &q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_PAUSED, &q->state)))
+        return status;
+    if (q->state != BCE_VHCI_ENDPOINT_PAUSED)
+        return -EINVAL;
+    if (q->sq_in)
+        bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_in->qid);
+    if (q->sq_out)
+        bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_out->qid);
+    return 0;
+}
+
+static void bce_vhci_urb_resume(struct bce_vhci_urb *urb);
+
+int bce_vhci_transfer_queue_do_resume(struct bce_vhci_transfer_queue *q)
+{
+    unsigned long flags;
+    int status;
+    struct urb *urb, *urbt;
+    struct bce_vhci_urb *vurb;
+    u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F);
+    if ((status = bce_vhci_cmd_endpoint_set_state(
+            &q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_ACTIVE, &q->state)))
+        return status;
+    if (q->state != BCE_VHCI_ENDPOINT_ACTIVE)
+        return -EINVAL;
+    spin_lock_irqsave(&q->urb_lock, flags);
+    q->active = true;
+    list_for_each_entry_safe(urb, urbt, &q->endp->urb_list, urb_list) {
+        vurb = urb->hcpriv;
+        if (vurb->state == BCE_VHCI_URB_INIT_PENDING) {
+            if (!bce_vhci_transfer_queue_can_init_urb(q))
+                break;
+            bce_vhci_urb_init(vurb);
+        } else {
+            bce_vhci_urb_resume(vurb);
+        }
+    }
+    bce_vhci_transfer_queue_deliver_pending(q);
+    spin_unlock_irqrestore(&q->urb_lock, flags);
+    return 0;
+}
+
+int bce_vhci_transfer_queue_pause(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src)
+{
+    int ret = 0;
+    mutex_lock(&q->pause_lock);
+    if ((q->paused_by & src) != src) {
+        if (!q->paused_by)
+            ret = bce_vhci_transfer_queue_do_pause(q);
+        if (!ret)
+            q->paused_by |= src;
+    }
+    mutex_unlock(&q->pause_lock);
+    return ret;
+}
+
+int bce_vhci_transfer_queue_resume(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src)
+{
+    int ret = 0;
+    mutex_lock(&q->pause_lock);
+    if (q->paused_by & src) {
+        if (!(q->paused_by & ~src))
+            ret = bce_vhci_transfer_queue_do_resume(q);
+        if (!ret)
+            q->paused_by &= ~src;
+    }
+    mutex_unlock(&q->pause_lock);
+    return ret;
+}
+
+static void bce_vhci_transfer_queue_reset_w(struct work_struct *work)
+{
+    unsigned long flags;
+    struct bce_vhci_transfer_queue *q = container_of(work, struct bce_vhci_transfer_queue, w_reset);
+
+    mutex_lock(&q->pause_lock);
+    spin_lock_irqsave(&q->urb_lock, flags);
+    if (!q->stalled) {
+        spin_unlock_irqrestore(&q->urb_lock, flags);
+        mutex_unlock(&q->pause_lock);
+        return;
+    }
+    q->active = false;
+    spin_unlock_irqrestore(&q->urb_lock, flags);
+    q->paused_by |= BCE_VHCI_PAUSE_INTERNAL_WQ;
+    bce_vhci_transfer_queue_remove_pending(q);
+    if (q->sq_in)
+        bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_in->qid);
+    if (q->sq_out)
+        bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_out->qid);
+    bce_vhci_cmd_endpoint_reset(&q->vhci->cq, q->dev_addr, (u8) (q->endp->desc.bEndpointAddress & 0x8F));
+    spin_lock_irqsave(&q->urb_lock, flags);
+    q->stalled = false;
+    spin_unlock_irqrestore(&q->urb_lock, flags);
+    mutex_unlock(&q->pause_lock);
+    bce_vhci_transfer_queue_resume(q, BCE_VHCI_PAUSE_INTERNAL_WQ);
+}
+
+void bce_vhci_transfer_queue_request_reset(struct bce_vhci_transfer_queue *q)
+{
+    queue_work(q->vhci->tq_state_wq, &q->w_reset);
+}
+
+static void bce_vhci_transfer_queue_init_pending_urbs(struct bce_vhci_transfer_queue *q)
+{
+    struct urb *urb, *urbt;
+    struct bce_vhci_urb *vurb;
+    list_for_each_entry_safe(urb, urbt, &q->endp->urb_list, urb_list) {
+        vurb = urb->hcpriv;
+        if (!bce_vhci_transfer_queue_can_init_urb(q))
+            break;
+        if (vurb->state == BCE_VHCI_URB_INIT_PENDING)
+            bce_vhci_urb_init(vurb);
+    }
+}
+
+
+
+static int bce_vhci_urb_data_start(struct bce_vhci_urb *urb, unsigned long *timeout);
+
+int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb)
+{
+    unsigned long flags;
+    int status = 0;
+    struct bce_vhci_urb *vurb;
+    vurb = kzalloc(sizeof(struct bce_vhci_urb), GFP_KERNEL);
+    urb->hcpriv = vurb;
+
+    vurb->q = q;
+    vurb->urb = urb;
+    vurb->dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+    vurb->is_control = (usb_endpoint_num(&urb->ep->desc) == 0);
+
+    spin_lock_irqsave(&q->urb_lock, flags);
+    status = usb_hcd_link_urb_to_ep(q->vhci->hcd, urb);
+    if (status) {
+        spin_unlock_irqrestore(&q->urb_lock, flags);
+        urb->hcpriv = NULL;
+        kfree(vurb);
+        return status;
+    }
+
+    if (q->active) {
+        if (bce_vhci_transfer_queue_can_init_urb(vurb->q))
+            status = bce_vhci_urb_init(vurb);
+        else
+            vurb->state = BCE_VHCI_URB_INIT_PENDING;
+    } else {
+        if (q->stalled)
+            bce_vhci_transfer_queue_request_reset(q);
+        vurb->state = BCE_VHCI_URB_INIT_PENDING;
+    }
+    if (status) {
+        usb_hcd_unlink_urb_from_ep(q->vhci->hcd, urb);
+        urb->hcpriv = NULL;
+        kfree(vurb);
+    } else {
+        bce_vhci_transfer_queue_deliver_pending(q);
+    }
+    spin_unlock_irqrestore(&q->urb_lock, flags);
+    pr_debug("bce-vhci: [%02x] URB enqueued (dir = %s, size = %i)\n", q->endp_addr,
+            usb_urb_dir_in(urb) ? "IN" : "OUT", urb->transfer_buffer_length);
+    return status;
+}
+
+static int bce_vhci_urb_init(struct bce_vhci_urb *vurb)
+{
+    int status = 0;
+
+    if (vurb->q->remaining_active_requests == 0) {
+        pr_err("bce-vhci: cannot init request (remaining_active_requests = 0)\n");
+        return -EINVAL;
+    }
+
+    if (vurb->is_control) {
+        vurb->state = BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST;
+    } else {
+        status = bce_vhci_urb_data_start(vurb, NULL);
+    }
+
+    if (!status) {
+        --vurb->q->remaining_active_requests;
+    }
+    return status;
+}
+
+static void bce_vhci_urb_complete(struct bce_vhci_urb *urb, int status)
+{
+    struct bce_vhci_transfer_queue *q = urb->q;
+    struct bce_vhci *vhci = q->vhci;
+    struct urb *real_urb = urb->urb;
+    pr_debug("bce-vhci: [%02x] URB complete %i\n", q->endp_addr, status);
+    usb_hcd_unlink_urb_from_ep(vhci->hcd, real_urb);
+    real_urb->hcpriv = NULL;
+    real_urb->status = status;
+    if (urb->state != BCE_VHCI_URB_INIT_PENDING)
+        ++urb->q->remaining_active_requests;
+    kfree(urb);
+    list_add_tail(&real_urb->urb_list, &q->giveback_urb_list);
+}
+
+int bce_vhci_urb_request_cancel(struct bce_vhci_transfer_queue *q, struct urb *urb, int status)
+{
+    struct bce_vhci_urb *vurb;
+    unsigned long flags;
+    int ret;
+    enum bce_vhci_urb_state old_state;
+
+    spin_lock_irqsave(&q->urb_lock, flags);
+    if ((ret = usb_hcd_check_unlink_urb(q->vhci->hcd, urb, status))) {
+        spin_unlock_irqrestore(&q->urb_lock, flags);
+        return ret;
+    }
+
+    vurb = urb->hcpriv;
+
+    old_state = vurb->state; /* save old state to use later because we'll set state as cancelled */
+
+    if (old_state == BCE_VHCI_URB_CANCELLED) {
+        spin_unlock_irqrestore(&q->urb_lock, flags);
+        pr_debug("bce-vhci: URB %p is already cancelled, skipping\n", urb);
+        return 0;
+    }
+
+    vurb->state = BCE_VHCI_URB_CANCELLED;
+
+    /* If the URB wasn't posted to the device yet, we can still remove it on the host without pausing the queue. */
+    if (old_state != BCE_VHCI_URB_INIT_PENDING) {
+        pr_debug("bce-vhci: [%02x] Cancelling URB\n", q->endp_addr);
+
+        spin_unlock_irqrestore(&q->urb_lock, flags);
+        bce_vhci_transfer_queue_pause(q, BCE_VHCI_PAUSE_INTERNAL_WQ);
+        spin_lock_irqsave(&q->urb_lock, flags);
+
+        ++q->remaining_active_requests;
+    }
+
+    usb_hcd_unlink_urb_from_ep(q->vhci->hcd, urb);
+
+    spin_unlock_irqrestore(&q->urb_lock, flags);
+
+    usb_hcd_giveback_urb(q->vhci->hcd, urb, status);
+
+    if (old_state != BCE_VHCI_URB_INIT_PENDING)
+        bce_vhci_transfer_queue_resume(q, BCE_VHCI_PAUSE_INTERNAL_WQ);
+
+    kfree(vurb);
+
+    return 0;
+}
+
+static int bce_vhci_urb_data_transfer_in(struct bce_vhci_urb *urb, unsigned long *timeout)
+{
+    struct bce_vhci_message msg;
+    struct bce_qe_submission *s;
+    u32 tr_len;
+    int reservation1, reservation2 = -EFAULT;
+
+    pr_debug("bce-vhci: [%02x] DMA from device %llx %x\n", urb->q->endp_addr,
+             (u64) urb->urb->transfer_dma, urb->urb->transfer_buffer_length);
+
+    /* Reserve both a message and a submission, so we don't run into issues later. */
+    reservation1 = bce_reserve_submission(urb->q->vhci->msg_asynchronous.sq, timeout);
+    if (!reservation1)
+        reservation2 = bce_reserve_submission(urb->q->sq_in, timeout);
+    if (reservation1 || reservation2) {
+        pr_err("bce-vhci: Failed to reserve a submission for URB data transfer\n");
+        if (!reservation1)
+            bce_cancel_submission_reservation(urb->q->vhci->msg_asynchronous.sq);
+        return -ENOMEM;
+    }
+
+    urb->send_offset = urb->receive_offset;
+
+    tr_len = urb->urb->transfer_buffer_length - urb->send_offset;
+
+    spin_lock(&urb->q->vhci->msg_asynchronous_lock);
+    msg.cmd = BCE_VHCI_CMD_TRANSFER_REQUEST;
+    msg.status = 0;
+    msg.param1 = ((urb->urb->ep->desc.bEndpointAddress & 0x8Fu) << 8) | urb->q->dev_addr;
+    msg.param2 = tr_len;
+    bce_vhci_message_queue_write(&urb->q->vhci->msg_asynchronous, &msg);
+    spin_unlock(&urb->q->vhci->msg_asynchronous_lock);
+
+    s = bce_next_submission(urb->q->sq_in);
+    bce_set_submission_single(s, urb->urb->transfer_dma + urb->send_offset, tr_len);
+    bce_submit_to_device(urb->q->sq_in);
+
+    urb->state = BCE_VHCI_URB_WAITING_FOR_COMPLETION;
+    return 0;
+}
+
+static int bce_vhci_urb_data_start(struct bce_vhci_urb *urb, unsigned long *timeout)
+{
+    if (urb->dir == DMA_TO_DEVICE) {
+        if (urb->urb->transfer_buffer_length > 0)
+            urb->state = BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST;
+        else
+            urb->state = BCE_VHCI_URB_DATA_TRANSFER_COMPLETE;
+        return 0;
+    } else {
+        return bce_vhci_urb_data_transfer_in(urb, timeout);
+    }
+}
+
+static int bce_vhci_urb_send_out_data(struct bce_vhci_urb *urb, dma_addr_t addr, size_t size)
+{
+    struct bce_qe_submission *s;
+    unsigned long timeout = 0;
+    if (bce_reserve_submission(urb->q->sq_out, &timeout)) {
+        pr_err("bce-vhci: Failed to reserve a submission for URB data transfer\n");
+        return -EPIPE;
+    }
+
+    pr_debug("bce-vhci: [%02x] DMA to device %llx %lx\n", urb->q->endp_addr, (u64) addr, size);
+
+    s = bce_next_submission(urb->q->sq_out);
+    bce_set_submission_single(s, addr, size);
+    bce_submit_to_device(urb->q->sq_out);
+    return 0;
+}
+
+static int bce_vhci_urb_data_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg)
+{
+    u32 tr_len;
+    int status;
+    if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST) {
+        if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST) {
+            tr_len = min(urb->urb->transfer_buffer_length - urb->send_offset, (u32) msg->param2);
+            if ((status = bce_vhci_urb_send_out_data(urb, urb->urb->transfer_dma + urb->send_offset, tr_len)))
+                return status;
+            urb->send_offset += tr_len;
+            urb->state = BCE_VHCI_URB_WAITING_FOR_COMPLETION;
+            return 0;
+        }
+    }
+
+    /* 0x1000 in out queues aren't really unexpected */
+    if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST && urb->q->sq_out != NULL)
+        return -EAGAIN;
+    pr_err("bce-vhci: [%02x] %s URB unexpected message (state = %x, msg: %x %x %x %llx)\n",
+            urb->q->endp_addr, (urb->is_control ? "Control (data update)" : "Data"), urb->state,
+            msg->cmd, msg->status, msg->param1, msg->param2);
+    return -EAGAIN;
+}
+
+static int bce_vhci_urb_data_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c)
+{
+    if (urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
+        urb->receive_offset += c->data_size;
+        if (urb->dir == DMA_FROM_DEVICE || urb->receive_offset >= urb->urb->transfer_buffer_length) {
+            urb->urb->actual_length = (u32) urb->receive_offset;
+            urb->state = BCE_VHCI_URB_DATA_TRANSFER_COMPLETE;
+            if (!urb->is_control) {
+                bce_vhci_urb_complete(urb, 0);
+                return -ENOENT;
+            }
+        }
+    } else {
+        pr_err("bce-vhci: [%02x] Data URB unexpected completion\n", urb->q->endp_addr);
+    }
+    return 0;
+}
+
+
+static int bce_vhci_urb_control_check_status(struct bce_vhci_urb *urb)
+{
+    struct bce_vhci_transfer_queue *q = urb->q;
+    if (urb->received_status == 0)
+        return 0;
+    if (urb->state == BCE_VHCI_URB_DATA_TRANSFER_COMPLETE ||
+        (urb->received_status != BCE_VHCI_SUCCESS && urb->state != BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST &&
+        urb->state != BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION)) {
+        urb->state = BCE_VHCI_URB_CONTROL_COMPLETE;
+        if (urb->received_status != BCE_VHCI_SUCCESS) {
+            pr_err("bce-vhci: [%02x] URB failed: %x\n", urb->q->endp_addr, urb->received_status);
+            urb->q->active = false;
+            urb->q->stalled = true;
+            bce_vhci_urb_complete(urb, -EPIPE);
+            if (!list_empty(&q->endp->urb_list))
+                bce_vhci_transfer_queue_request_reset(q);
+            return -ENOENT;
+        }
+        bce_vhci_urb_complete(urb, 0);
+        return -ENOENT;
+    }
+    return 0;
+}
+
+static int bce_vhci_urb_control_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg)
+{
+    int status;
+    if (msg->cmd == BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS) {
+        urb->received_status = msg->status;
+        return bce_vhci_urb_control_check_status(urb);
+    }
+
+    if (urb->state == BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST) {
+        if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST) {
+            if (bce_vhci_urb_send_out_data(urb, urb->urb->setup_dma, sizeof(struct usb_ctrlrequest))) {
+                pr_err("bce-vhci: [%02x] Failed to start URB setup transfer\n", urb->q->endp_addr);
+                return 0; /* TODO: fail the URB? */
+            }
+            urb->state = BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION;
+            pr_debug("bce-vhci: [%02x] Sent setup %llx\n", urb->q->endp_addr, urb->urb->setup_dma);
+            return 0;
+        }
+    } else if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST ||
+               urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
+        if ((status = bce_vhci_urb_data_update(urb, msg)))
+            return status;
+        return bce_vhci_urb_control_check_status(urb);
+    }
+
+    /* 0x1000 in out queues aren't really unexpected */
+    if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST && urb->q->sq_out != NULL)
+        return -EAGAIN;
+    pr_err("bce-vhci: [%02x] Control URB unexpected message (state = %x, msg: %x %x %x %llx)\n", urb->q->endp_addr,
+            urb->state, msg->cmd, msg->status, msg->param1, msg->param2);
+    return -EAGAIN;
+}
+
+static int bce_vhci_urb_control_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c)
+{
+    int status;
+    unsigned long timeout;
+
+    if (urb->state == BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION) {
+        if (c->data_size != sizeof(struct usb_ctrlrequest))
+            pr_err("bce-vhci: [%02x] transfer complete data size mistmatch for usb_ctrlrequest (%llx instead of %lx)\n",
+                   urb->q->endp_addr, c->data_size, sizeof(struct usb_ctrlrequest));
+
+        timeout = 1000;
+        status = bce_vhci_urb_data_start(urb, &timeout);
+        if (status) {
+            bce_vhci_urb_complete(urb, status);
+            return -ENOENT;
+        }
+        return 0;
+    } else if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST ||
+               urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
+        if ((status = bce_vhci_urb_data_transfer_completion(urb, c)))
+            return status;
+        return bce_vhci_urb_control_check_status(urb);
+    } else {
+        pr_err("bce-vhci: [%02x] Control URB unexpected completion (state = %x)\n", urb->q->endp_addr, urb->state);
+    }
+    return 0;
+}
+
+static int bce_vhci_urb_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg)
+{
+    if (urb->state == BCE_VHCI_URB_INIT_PENDING)
+        return -EAGAIN;
+    if (urb->is_control)
+        return bce_vhci_urb_control_update(urb, msg);
+    else
+        return bce_vhci_urb_data_update(urb, msg);
+}
+
+static int bce_vhci_urb_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c)
+{
+    if (urb->is_control)
+        return bce_vhci_urb_control_transfer_completion(urb, c);
+    else
+        return bce_vhci_urb_data_transfer_completion(urb, c);
+}
+
+static void bce_vhci_urb_resume(struct bce_vhci_urb *urb)
+{
+    int status = 0;
+    if (urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
+        status = bce_vhci_urb_data_transfer_in(urb, NULL);
+    }
+    if (status)
+        bce_vhci_urb_complete(urb, status);
+}
diff --git a/drivers/staging/apple-bce/vhci/transfer.h b/drivers/staging/apple-bce/vhci/transfer.h
new file mode 100644
index 000000000000..b5403d5703ed
--- /dev/null
+++ b/drivers/staging/apple-bce/vhci/transfer.h
@@ -0,0 +1,75 @@
+#ifndef BCEDRIVER_TRANSFER_H
+#define BCEDRIVER_TRANSFER_H
+
+#include <linux/usb.h>
+#include "queue.h"
+#include "command.h"
+#include "../queue.h"
+
+struct bce_vhci_list_message {
+    struct list_head list;
+    struct bce_vhci_message msg;
+};
+enum bce_vhci_pause_source {
+    BCE_VHCI_PAUSE_INTERNAL_WQ = 1,
+    BCE_VHCI_PAUSE_FIRMWARE = 2,
+    BCE_VHCI_PAUSE_SUSPEND = 4,
+    BCE_VHCI_PAUSE_SHUTDOWN = 8
+};
+struct bce_vhci_transfer_queue {
+    struct bce_vhci *vhci;
+    struct usb_host_endpoint *endp;
+    enum bce_vhci_endpoint_state state;
+    u32 max_active_requests, remaining_active_requests;
+    bool active, stalled;
+    u32 paused_by;
+    bce_vhci_device_t dev_addr;
+    u8 endp_addr;
+    struct bce_queue_cq *cq;
+    struct bce_queue_sq *sq_in;
+    struct bce_queue_sq *sq_out;
+    struct list_head evq;
+    struct spinlock urb_lock;
+    struct mutex pause_lock;
+    struct list_head giveback_urb_list;
+
+    struct work_struct w_reset;
+};
+enum bce_vhci_urb_state {
+    BCE_VHCI_URB_INIT_PENDING,
+
+    BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST,
+    BCE_VHCI_URB_WAITING_FOR_COMPLETION,
+    BCE_VHCI_URB_DATA_TRANSFER_COMPLETE,
+
+    BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST,
+    BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION,
+    BCE_VHCI_URB_CONTROL_COMPLETE,
+
+    BCE_VHCI_URB_CANCELLED
+};
+struct bce_vhci_urb {
+    struct urb *urb;
+    struct bce_vhci_transfer_queue *q;
+    enum dma_data_direction dir;
+    bool is_control;
+    enum bce_vhci_urb_state state;
+    int received_status;
+    u32 send_offset;
+    u32 receive_offset;
+};
+
+void bce_vhci_create_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q,
+        struct usb_host_endpoint *endp, bce_vhci_device_t dev_addr, enum dma_data_direction dir);
+void bce_vhci_destroy_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q);
+void bce_vhci_transfer_queue_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg);
+int bce_vhci_transfer_queue_do_pause(struct bce_vhci_transfer_queue *q);
+int bce_vhci_transfer_queue_do_resume(struct bce_vhci_transfer_queue *q);
+int bce_vhci_transfer_queue_pause(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src);
+int bce_vhci_transfer_queue_resume(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src);
+void bce_vhci_transfer_queue_request_reset(struct bce_vhci_transfer_queue *q);
+
+int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb);
+int bce_vhci_urb_request_cancel(struct bce_vhci_transfer_queue *q, struct urb *urb, int status);
+
+#endif //BCEDRIVER_TRANSFER_H
diff --git a/drivers/staging/apple-bce/vhci/vhci.c b/drivers/staging/apple-bce/vhci/vhci.c
new file mode 100644
index 000000000000..3394e0aa2eef
--- /dev/null
+++ b/drivers/staging/apple-bce/vhci/vhci.c
@@ -0,0 +1,763 @@
+#include "vhci.h"
+#include "../apple_bce.h"
+#include "command.h"
+#include <linux/usb.h>
+#include <linux/usb/hcd.h>
+#include <linux/module.h>
+#include <linux/version.h>
+
+static dev_t bce_vhci_chrdev;
+static struct class *bce_vhci_class;
+static const struct hc_driver bce_vhci_driver;
+static u16 bce_vhci_port_mask = U16_MAX;
+
+static int bce_vhci_create_event_queues(struct bce_vhci *vhci);
+static void bce_vhci_destroy_event_queues(struct bce_vhci *vhci);
+static int bce_vhci_create_message_queues(struct bce_vhci *vhci);
+static void bce_vhci_destroy_message_queues(struct bce_vhci *vhci);
+static void bce_vhci_handle_firmware_events_w(struct work_struct *ws);
+static void bce_vhci_firmware_event_completion(struct bce_queue_sq *sq);
+
+int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci)
+{
+    int status;
+
+    spin_lock_init(&vhci->hcd_spinlock);
+
+    vhci->dev = dev;
+
+    vhci->vdevt = bce_vhci_chrdev;
+    vhci->vdev = device_create(bce_vhci_class, dev->dev, vhci->vdevt, NULL, "bce-vhci");
+    if (IS_ERR_OR_NULL(vhci->vdev)) {
+        status = PTR_ERR(vhci->vdev);
+        goto fail_dev;
+    }
+
+    if ((status = bce_vhci_create_message_queues(vhci)))
+        goto fail_mq;
+    if ((status = bce_vhci_create_event_queues(vhci)))
+        goto fail_eq;
+
+    vhci->tq_state_wq = alloc_ordered_workqueue("bce-vhci-tq-state", 0);
+    INIT_WORK(&vhci->w_fw_events, bce_vhci_handle_firmware_events_w);
+
+    vhci->hcd = usb_create_hcd(&bce_vhci_driver, vhci->vdev, "bce-vhci");
+    if (!vhci->hcd) {
+        status = -ENOMEM;
+        goto fail_hcd;
+    }
+    vhci->hcd->self.sysdev = &dev->pci->dev;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5,4,0)
+    vhci->hcd->self.uses_dma = 1;
+#endif
+    *((struct bce_vhci **) vhci->hcd->hcd_priv) = vhci;
+    vhci->hcd->speed = HCD_USB2;
+
+    if ((status = usb_add_hcd(vhci->hcd, 0, 0)))
+        goto fail_hcd;
+
+    return 0;
+
+fail_hcd:
+    bce_vhci_destroy_event_queues(vhci);
+fail_eq:
+    bce_vhci_destroy_message_queues(vhci);
+fail_mq:
+    device_destroy(bce_vhci_class, vhci->vdevt);
+fail_dev:
+    if (!status)
+        status = -EINVAL;
+    return status;
+}
+
+void bce_vhci_destroy(struct bce_vhci *vhci)
+{
+    usb_remove_hcd(vhci->hcd);
+    bce_vhci_destroy_event_queues(vhci);
+    bce_vhci_destroy_message_queues(vhci);
+    device_destroy(bce_vhci_class, vhci->vdevt);
+}
+
+struct bce_vhci *bce_vhci_from_hcd(struct usb_hcd *hcd)
+{
+    return *((struct bce_vhci **) hcd->hcd_priv);
+}
+
+int bce_vhci_start(struct usb_hcd *hcd)
+{
+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
+    int status;
+    u16 port_mask = 0;
+    bce_vhci_port_t port_no = 0;
+    if ((status = bce_vhci_cmd_controller_enable(&vhci->cq, 1, &port_mask)))
+        return status;
+    vhci->port_mask = port_mask;
+    vhci->port_power_mask = 0;
+    if ((status = bce_vhci_cmd_controller_start(&vhci->cq)))
+        return status;
+    port_mask = vhci->port_mask;
+    while (port_mask) {
+        port_no += 1;
+        port_mask >>= 1;
+    }
+    vhci->port_count = port_no;
+    return 0;
+}
+
+void bce_vhci_stop(struct usb_hcd *hcd)
+{
+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
+    bce_vhci_cmd_controller_disable(&vhci->cq);
+}
+
+static int bce_vhci_hub_status_data(struct usb_hcd *hcd, char *buf)
+{
+    return 0;
+}
+
+static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout);
+
+static int bce_vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, char *buf, u16 wLength)
+{
+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
+    int status;
+    struct usb_hub_descriptor *hd;
+    struct usb_hub_status *hs;
+    struct usb_port_status *ps;
+    u32 port_status;
+    // pr_info("bce-vhci: bce_vhci_hub_control %x %i %i [bufl=%i]\n", typeReq, wValue, wIndex, wLength);
+    if (typeReq == GetHubDescriptor && wLength >= sizeof(struct usb_hub_descriptor)) {
+        hd = (struct usb_hub_descriptor *) buf;
+        memset(hd, 0, sizeof(*hd));
+        hd->bDescLength = sizeof(struct usb_hub_descriptor);
+        hd->bDescriptorType = USB_DT_HUB;
+        hd->bNbrPorts = (u8) vhci->port_count;
+        hd->wHubCharacteristics = HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_INDV_PORT_OCPM;
+        hd->bPwrOn2PwrGood = 0;
+        hd->bHubContrCurrent = 0;
+        return 0;
+    } else if (typeReq == GetHubStatus && wLength >= sizeof(struct usb_hub_status)) {
+        hs = (struct usb_hub_status *) buf;
+        memset(hs, 0, sizeof(*hs));
+        hs->wHubStatus = 0;
+        hs->wHubChange = 0;
+        return 0;
+    } else if (typeReq == GetPortStatus && wLength >= 4 /* usb 2.0 */) {
+        ps = (struct usb_port_status *) buf;
+        ps->wPortStatus = 0;
+        ps->wPortChange = 0;
+
+        if (vhci->port_power_mask & BIT(wIndex))
+            ps->wPortStatus |= USB_PORT_STAT_POWER;
+
+        if (!(bce_vhci_port_mask & BIT(wIndex)))
+            return 0;
+
+        if ((status = bce_vhci_cmd_port_status(&vhci->cq, (u8) wIndex, 0, &port_status)))
+            return status;
+
+        if (port_status & 16)
+            ps->wPortStatus |= USB_PORT_STAT_ENABLE | USB_PORT_STAT_HIGH_SPEED;
+        if (port_status & 4)
+            ps->wPortStatus |= USB_PORT_STAT_CONNECTION;
+        if (port_status & 2)
+            ps->wPortStatus |= USB_PORT_STAT_OVERCURRENT;
+        if (port_status & 8)
+            ps->wPortStatus |= USB_PORT_STAT_RESET;
+        if (port_status & 0x60)
+            ps->wPortStatus |= USB_PORT_STAT_SUSPEND;
+
+        if (port_status & 0x40000)
+            ps->wPortChange |= USB_PORT_STAT_C_CONNECTION;
+
+        pr_debug("bce-vhci: Translated status %x to %x:%x\n", port_status, ps->wPortStatus, ps->wPortChange);
+        return 0;
+    } else if (typeReq == SetPortFeature) {
+        if (wValue == USB_PORT_FEAT_POWER) {
+            status = bce_vhci_cmd_port_power_on(&vhci->cq, (u8) wIndex);
+            /* As far as I am aware, power status is not part of the port status so store it separately */
+            if (!status)
+                vhci->port_power_mask |= BIT(wIndex);
+            return status;
+        }
+        if (wValue == USB_PORT_FEAT_RESET) {
+            return bce_vhci_reset_device(vhci, wIndex, wValue);
+        }
+        if (wValue == USB_PORT_FEAT_SUSPEND) {
+            /* TODO: Am I supposed to also suspend the endpoints? */
+            pr_debug("bce-vhci: Suspending port %i\n", wIndex);
+            return bce_vhci_cmd_port_suspend(&vhci->cq, (u8) wIndex);
+        }
+    } else if (typeReq == ClearPortFeature) {
+        if (wValue == USB_PORT_FEAT_ENABLE)
+            return bce_vhci_cmd_port_disable(&vhci->cq, (u8) wIndex);
+        if (wValue == USB_PORT_FEAT_POWER) {
+            status = bce_vhci_cmd_port_power_off(&vhci->cq, (u8) wIndex);
+            if (!status)
+                vhci->port_power_mask &= ~BIT(wIndex);
+            return status;
+        }
+        if (wValue == USB_PORT_FEAT_C_CONNECTION)
+            return bce_vhci_cmd_port_status(&vhci->cq, (u8) wIndex, 0x40000, &port_status);
+        if (wValue == USB_PORT_FEAT_C_RESET) { /* I don't think I can transfer it in any way */
+            return 0;
+        }
+        if (wValue == USB_PORT_FEAT_SUSPEND) {
+            pr_debug("bce-vhci: Resuming port %i\n", wIndex);
+            return bce_vhci_cmd_port_resume(&vhci->cq, (u8) wIndex);
+        }
+    }
+    pr_err("bce-vhci: bce_vhci_hub_control unhandled request: %x %i %i [bufl=%i]\n", typeReq, wValue, wIndex, wLength);
+    dump_stack();
+    return -EIO;
+}
+
+static int bce_vhci_enable_device(struct usb_hcd *hcd, struct usb_device *udev)
+{
+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
+    struct bce_vhci_device *vdev;
+    bce_vhci_device_t devid;
+    pr_info("bce_vhci_enable_device\n");
+
+    if (vhci->port_to_device[udev->portnum])
+        return 0;
+
+    /* We need to early address the device */
+    if (bce_vhci_cmd_device_create(&vhci->cq, udev->portnum, &devid))
+        return -EIO;
+
+    pr_info("bce_vhci_cmd_device_create %i -> %i\n", udev->portnum, devid);
+
+    vdev = kzalloc(sizeof(struct bce_vhci_device), GFP_KERNEL);
+    vhci->port_to_device[udev->portnum] = devid;
+    vhci->devices[devid] = vdev;
+
+    bce_vhci_create_transfer_queue(vhci, &vdev->tq[0], &udev->ep0, devid, DMA_BIDIRECTIONAL);
+    udev->ep0.hcpriv = &vdev->tq[0];
+    vdev->tq_mask |= BIT(0);
+
+    bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &udev->ep0.desc);
+    return 0;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,8,0)
+static int bce_vhci_address_device(struct usb_hcd *hcd, struct usb_device *udev)
+#else
+static int bce_vhci_address_device(struct usb_hcd *hcd, struct usb_device *udev, unsigned int timeout_ms) //TODO: follow timeout
+#endif
+{
+    /* This is the same as enable_device, but instead in the old scheme */
+    return bce_vhci_enable_device(hcd, udev);
+}
+
+static void bce_vhci_free_device(struct usb_hcd *hcd, struct usb_device *udev)
+{
+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
+    int i;
+    bce_vhci_device_t devid;
+    struct bce_vhci_device *dev;
+    pr_info("bce_vhci_free_device %i\n", udev->portnum);
+    if (!vhci->port_to_device[udev->portnum])
+        return;
+    devid = vhci->port_to_device[udev->portnum];
+    dev = vhci->devices[devid];
+    for (i = 0; i < 32; i++) {
+        if (dev->tq_mask & BIT(i)) {
+            bce_vhci_transfer_queue_pause(&dev->tq[i], BCE_VHCI_PAUSE_SHUTDOWN);
+            bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) i);
+            bce_vhci_destroy_transfer_queue(vhci, &dev->tq[i]);
+        }
+    }
+    vhci->devices[devid] = NULL;
+    vhci->port_to_device[udev->portnum] = 0;
+    bce_vhci_cmd_device_destroy(&vhci->cq, devid);
+    kfree(dev);
+}
+
+static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout)
+{
+    struct bce_vhci_device *dev = NULL;
+    bce_vhci_device_t devid;
+    int i;
+    int status;
+    enum dma_data_direction dir;
+    pr_info("bce_vhci_reset_device %i\n", index);
+
+    devid = vhci->port_to_device[index];
+    if (devid) {
+        dev = vhci->devices[devid];
+
+        for (i = 0; i < 32; i++) {
+            if (dev->tq_mask & BIT(i)) {
+                bce_vhci_transfer_queue_pause(&dev->tq[i], BCE_VHCI_PAUSE_SHUTDOWN);
+                bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) i);
+                bce_vhci_destroy_transfer_queue(vhci, &dev->tq[i]);
+            }
+        }
+        vhci->devices[devid] = NULL;
+        vhci->port_to_device[index] = 0;
+        bce_vhci_cmd_device_destroy(&vhci->cq, devid);
+    }
+    status = bce_vhci_cmd_port_reset(&vhci->cq, (u8) index, timeout);
+
+    if (dev) {
+        if ((status = bce_vhci_cmd_device_create(&vhci->cq, index, &devid)))
+            return status;
+        vhci->devices[devid] = dev;
+        vhci->port_to_device[index] = devid;
+
+        for (i = 0; i < 32; i++) {
+            if (dev->tq_mask & BIT(i)) {
+                dir = usb_endpoint_dir_in(&dev->tq[i].endp->desc) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+                if (i == 0)
+                    dir = DMA_BIDIRECTIONAL;
+                bce_vhci_create_transfer_queue(vhci, &dev->tq[i], dev->tq[i].endp, devid, dir);
+                bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &dev->tq[i].endp->desc);
+            }
+        }
+    }
+
+    return status;
+}
+
+static int bce_vhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
+{
+    return 0;
+}
+
+static int bce_vhci_get_frame_number(struct usb_hcd *hcd)
+{
+    return 0;
+}
+
+static int bce_vhci_bus_suspend(struct usb_hcd *hcd)
+{
+    int i, j;
+    int status;
+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
+    pr_info("bce_vhci: suspend started\n");
+
+    pr_info("bce_vhci: suspend endpoints\n");
+    for (i = 0; i < 16; i++) {
+        if (!vhci->port_to_device[i])
+            continue;
+        for (j = 0; j < 32; j++) {
+            if (!(vhci->devices[vhci->port_to_device[i]]->tq_mask & BIT(j)))
+                continue;
+            bce_vhci_transfer_queue_pause(&vhci->devices[vhci->port_to_device[i]]->tq[j],
+                    BCE_VHCI_PAUSE_SUSPEND);
+        }
+    }
+
+    pr_info("bce_vhci: suspend ports\n");
+    for (i = 0; i < 16; i++) {
+        if (!vhci->port_to_device[i])
+            continue;
+        bce_vhci_cmd_port_suspend(&vhci->cq, i);
+    }
+    pr_info("bce_vhci: suspend controller\n");
+    if ((status = bce_vhci_cmd_controller_pause(&vhci->cq)))
+        return status;
+
+    bce_vhci_event_queue_pause(&vhci->ev_commands);
+    bce_vhci_event_queue_pause(&vhci->ev_system);
+    bce_vhci_event_queue_pause(&vhci->ev_isochronous);
+    bce_vhci_event_queue_pause(&vhci->ev_interrupt);
+    bce_vhci_event_queue_pause(&vhci->ev_asynchronous);
+    pr_info("bce_vhci: suspend done\n");
+    return 0;
+}
+
+static int bce_vhci_bus_resume(struct usb_hcd *hcd)
+{
+    int i, j;
+    int status;
+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
+    pr_info("bce_vhci: resume started\n");
+
+    bce_vhci_event_queue_resume(&vhci->ev_system);
+    bce_vhci_event_queue_resume(&vhci->ev_isochronous);
+    bce_vhci_event_queue_resume(&vhci->ev_interrupt);
+    bce_vhci_event_queue_resume(&vhci->ev_asynchronous);
+    bce_vhci_event_queue_resume(&vhci->ev_commands);
+
+    pr_info("bce_vhci: resume controller\n");
+    if ((status = bce_vhci_cmd_controller_start(&vhci->cq)))
+        return status;
+
+    pr_info("bce_vhci: resume ports\n");
+    for (i = 0; i < 16; i++) {
+        if (!vhci->port_to_device[i])
+            continue;
+        bce_vhci_cmd_port_resume(&vhci->cq, i);
+    }
+    pr_info("bce_vhci: resume endpoints\n");
+    for (i = 0; i < 16; i++) {
+        if (!vhci->port_to_device[i])
+            continue;
+        for (j = 0; j < 32; j++) {
+            if (!(vhci->devices[vhci->port_to_device[i]]->tq_mask & BIT(j)))
+                continue;
+            bce_vhci_transfer_queue_resume(&vhci->devices[vhci->port_to_device[i]]->tq[j],
+                    BCE_VHCI_PAUSE_SUSPEND);
+        }
+    }
+
+    pr_info("bce_vhci: resume done\n");
+    return 0;
+}
+
+static int bce_vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
+{
+    struct bce_vhci_transfer_queue *q = urb->ep->hcpriv;
+    pr_debug("bce_vhci_urb_enqueue %i:%x\n", q->dev_addr, urb->ep->desc.bEndpointAddress);
+    if (!q)
+        return -ENOENT;
+    return bce_vhci_urb_create(q, urb);
+}
+
+static int bce_vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
+{
+    struct bce_vhci_transfer_queue *q = urb->ep->hcpriv;
+    pr_debug("bce_vhci_urb_dequeue %x\n", urb->ep->desc.bEndpointAddress);
+    return bce_vhci_urb_request_cancel(q, urb, status);
+}
+
+static void bce_vhci_endpoint_reset(struct usb_hcd *hcd, struct usb_host_endpoint *ep)
+{
+    struct bce_vhci_transfer_queue *q = ep->hcpriv;
+    pr_debug("bce_vhci_endpoint_reset\n");
+    if (q)
+        bce_vhci_transfer_queue_request_reset(q);
+}
+
+static u8 bce_vhci_endpoint_index(u8 addr)
+{
+    if (addr & 0x80)
+        return (u8) (0x10 + (addr & 0xf));
+    return (u8) (addr & 0xf);
+}
+
+static int bce_vhci_add_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *endp)
+{
+    u8 endp_index = bce_vhci_endpoint_index(endp->desc.bEndpointAddress);
+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
+    bce_vhci_device_t devid = vhci->port_to_device[udev->portnum];
+    struct bce_vhci_device *vdev = vhci->devices[devid];
+    pr_debug("bce_vhci_add_endpoint %x/%x:%x\n", udev->portnum, devid, endp_index);
+
+    if (udev->bus->root_hub == udev) /* The USB hub */
+        return 0;
+    if (vdev == NULL)
+        return -ENODEV;
+    if (vdev->tq_mask & BIT(endp_index)) {
+        endp->hcpriv = &vdev->tq[endp_index];
+        return 0;
+    }
+
+    bce_vhci_create_transfer_queue(vhci, &vdev->tq[endp_index], endp, devid,
+            usb_endpoint_dir_in(&endp->desc) ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+    endp->hcpriv = &vdev->tq[endp_index];
+    vdev->tq_mask |= BIT(endp_index);
+
+    bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &endp->desc);
+    return 0;
+}
+
+static int bce_vhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *endp)
+{
+    u8 endp_index = bce_vhci_endpoint_index(endp->desc.bEndpointAddress);
+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
+    bce_vhci_device_t devid = vhci->port_to_device[udev->portnum];
+    struct bce_vhci_transfer_queue *q = endp->hcpriv;
+    struct bce_vhci_device *vdev = vhci->devices[devid];
+    pr_info("bce_vhci_drop_endpoint %x:%x\n", udev->portnum, endp_index);
+    if (!q) {
+        if (vdev && vdev->tq_mask & BIT(endp_index)) {
+            pr_err("something deleted the hcpriv?\n");
+            q = &vdev->tq[endp_index];
+        } else {
+            return 0;
+        }
+    }
+
+    bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) (endp->desc.bEndpointAddress & 0x8Fu));
+    vhci->devices[devid]->tq_mask &= ~BIT(endp_index);
+    bce_vhci_destroy_transfer_queue(vhci, q);
+    return 0;
+}
+
+static int bce_vhci_create_message_queues(struct bce_vhci *vhci)
+{
+    if (bce_vhci_message_queue_create(vhci, &vhci->msg_commands, "VHC1HostCommands") ||
+        bce_vhci_message_queue_create(vhci, &vhci->msg_system, "VHC1HostSystemEvents") ||
+        bce_vhci_message_queue_create(vhci, &vhci->msg_isochronous, "VHC1HostIsochronousEvents") ||
+        bce_vhci_message_queue_create(vhci, &vhci->msg_interrupt, "VHC1HostInterruptEvents") ||
+        bce_vhci_message_queue_create(vhci, &vhci->msg_asynchronous, "VHC1HostAsynchronousEvents")) {
+        bce_vhci_destroy_message_queues(vhci);
+        return -EINVAL;
+    }
+    spin_lock_init(&vhci->msg_asynchronous_lock);
+    bce_vhci_command_queue_create(&vhci->cq, &vhci->msg_commands);
+    return 0;
+}
+
+static void bce_vhci_destroy_message_queues(struct bce_vhci *vhci)
+{
+    bce_vhci_command_queue_destroy(&vhci->cq);
+    bce_vhci_message_queue_destroy(vhci, &vhci->msg_commands);
+    bce_vhci_message_queue_destroy(vhci, &vhci->msg_system);
+    bce_vhci_message_queue_destroy(vhci, &vhci->msg_isochronous);
+    bce_vhci_message_queue_destroy(vhci, &vhci->msg_interrupt);
+    bce_vhci_message_queue_destroy(vhci, &vhci->msg_asynchronous);
+}
+
+static void bce_vhci_handle_system_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg);
+static void bce_vhci_handle_usb_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg);
+
+static int bce_vhci_create_event_queues(struct bce_vhci *vhci)
+{
+    vhci->ev_cq = bce_create_cq(vhci->dev, 0x100);
+    if (!vhci->ev_cq)
+        return -EINVAL;
+#define CREATE_EVENT_QUEUE(field, name, cb) bce_vhci_event_queue_create(vhci, &vhci->field, name, cb)
+    if (__bce_vhci_event_queue_create(vhci, &vhci->ev_commands, "VHC1FirmwareCommands",
+            bce_vhci_firmware_event_completion) ||
+        CREATE_EVENT_QUEUE(ev_system,       "VHC1FirmwareSystemEvents",       bce_vhci_handle_system_event) ||
+        CREATE_EVENT_QUEUE(ev_isochronous,  "VHC1FirmwareIsochronousEvents",  bce_vhci_handle_usb_event) ||
+        CREATE_EVENT_QUEUE(ev_interrupt,    "VHC1FirmwareInterruptEvents",    bce_vhci_handle_usb_event) ||
+        CREATE_EVENT_QUEUE(ev_asynchronous, "VHC1FirmwareAsynchronousEvents", bce_vhci_handle_usb_event)) {
+        bce_vhci_destroy_event_queues(vhci);
+        return -EINVAL;
+    }
+#undef CREATE_EVENT_QUEUE
+    return 0;
+}
+
+static void bce_vhci_destroy_event_queues(struct bce_vhci *vhci)
+{
+    bce_vhci_event_queue_destroy(vhci, &vhci->ev_commands);
+    bce_vhci_event_queue_destroy(vhci, &vhci->ev_system);
+    bce_vhci_event_queue_destroy(vhci, &vhci->ev_isochronous);
+    bce_vhci_event_queue_destroy(vhci, &vhci->ev_interrupt);
+    bce_vhci_event_queue_destroy(vhci, &vhci->ev_asynchronous);
+    if (vhci->ev_cq)
+        bce_destroy_cq(vhci->dev, vhci->ev_cq);
+}
+
+static void bce_vhci_send_fw_event_response(struct bce_vhci *vhci, struct bce_vhci_message *req, u16 status)
+{
+    unsigned long timeout = 1000;
+    struct bce_vhci_message r = *req;
+    r.cmd = (u16) (req->cmd | 0x8000u);
+    r.status = status;
+    r.param1 = req->param1;
+    r.param2 = 0;
+
+    if (bce_reserve_submission(vhci->msg_system.sq, &timeout)) {
+        pr_err("bce-vhci: Cannot reserve submision for FW event reply\n");
+        return;
+    }
+    bce_vhci_message_queue_write(&vhci->msg_system, &r);
+}
+
+static int bce_vhci_handle_firmware_event(struct bce_vhci *vhci, struct bce_vhci_message *msg)
+{
+    unsigned long flags;
+    bce_vhci_device_t devid;
+    u8 endp;
+    struct bce_vhci_device *dev;
+    struct bce_vhci_transfer_queue *tq;
+    if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE || msg->cmd == BCE_VHCI_CMD_ENDPOINT_SET_STATE) {
+        devid = (bce_vhci_device_t) (msg->param1 & 0xff);
+        endp = bce_vhci_endpoint_index((u8) ((msg->param1 >> 8) & 0xff));
+        dev = vhci->devices[devid];
+        if (!dev || !(dev->tq_mask & BIT(endp)))
+            return BCE_VHCI_BAD_ARGUMENT;
+        tq = &dev->tq[endp];
+    }
+
+    if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE) {
+        if (msg->param2 == BCE_VHCI_ENDPOINT_ACTIVE) {
+            bce_vhci_transfer_queue_resume(tq, BCE_VHCI_PAUSE_FIRMWARE);
+            return BCE_VHCI_SUCCESS;
+        } else if (msg->param2 == BCE_VHCI_ENDPOINT_PAUSED) {
+            bce_vhci_transfer_queue_pause(tq, BCE_VHCI_PAUSE_FIRMWARE);
+            return BCE_VHCI_SUCCESS;
+        }
+        return BCE_VHCI_BAD_ARGUMENT;
+    } else if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_SET_STATE) {
+        if (msg->param2 == BCE_VHCI_ENDPOINT_STALLED) {
+            tq->state = msg->param2;
+            spin_lock_irqsave(&tq->urb_lock, flags);
+            tq->stalled = true;
+            spin_unlock_irqrestore(&tq->urb_lock, flags);
+            return BCE_VHCI_SUCCESS;
+        }
+        return BCE_VHCI_BAD_ARGUMENT;
+    }
+    pr_warn("bce-vhci: Unhandled firmware event: %x s=%x p1=%x p2=%llx\n",
+            msg->cmd, msg->status, msg->param1, msg->param2);
+    return BCE_VHCI_BAD_ARGUMENT;
+}
+
+static void bce_vhci_handle_firmware_events_w(struct work_struct *ws)
+{
+    size_t cnt = 0;
+    int result;
+    struct bce_vhci *vhci = container_of(ws, struct bce_vhci, w_fw_events);
+    struct bce_queue_sq *sq = vhci->ev_commands.sq;
+    struct bce_sq_completion_data *cq;
+    struct bce_vhci_message *msg, *msg2 = NULL;
+
+    while (true) {
+        if (msg2) {
+            msg = msg2;
+            msg2 = NULL;
+        } else if ((cq = bce_next_completion(sq))) {
+            if (cq->status == BCE_COMPLETION_ABORTED) {
+                bce_notify_submission_complete(sq);
+                continue;
+            }
+            msg = &vhci->ev_commands.data[sq->head];
+        } else {
+            break;
+        }
+
+        pr_debug("bce-vhci: Got fw event: %x s=%x p1=%x p2=%llx\n", msg->cmd, msg->status, msg->param1, msg->param2);
+        if ((cq = bce_next_completion(sq))) {
+            msg2 = &vhci->ev_commands.data[(sq->head + 1) % sq->el_count];
+            pr_debug("bce-vhci: Got second fw event: %x s=%x p1=%x p2=%llx\n",
+                    msg->cmd, msg->status, msg->param1, msg->param2);
+            if (cq->status != BCE_COMPLETION_ABORTED &&
+                msg2->cmd == (msg->cmd | 0x4000) && msg2->param1 == msg->param1) {
+                /* Take two elements */
+                pr_debug("bce-vhci: Cancelled\n");
+                bce_vhci_send_fw_event_response(vhci, msg, BCE_VHCI_ABORT);
+
+                bce_notify_submission_complete(sq);
+                bce_notify_submission_complete(sq);
+                msg2 = NULL;
+                cnt += 2;
+                continue;
+            }
+
+            pr_warn("bce-vhci: Handle fw event - unexpected cancellation\n");
+        }
+
+        result = bce_vhci_handle_firmware_event(vhci, msg);
+        bce_vhci_send_fw_event_response(vhci, msg, (u16) result);
+
+
+        bce_notify_submission_complete(sq);
+        ++cnt;
+    }
+    bce_vhci_event_queue_submit_pending(&vhci->ev_commands, cnt);
+    if (atomic_read(&sq->available_commands) == sq->el_count - 1) {
+        pr_debug("bce-vhci: complete\n");
+        complete(&vhci->ev_commands.queue_empty_completion);
+    }
+}
+
+static void bce_vhci_firmware_event_completion(struct bce_queue_sq *sq)
+{
+    struct bce_vhci_event_queue *q = sq->userdata;
+    queue_work(q->vhci->tq_state_wq, &q->vhci->w_fw_events);
+}
+
+static void bce_vhci_handle_system_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg)
+{
+    if (msg->cmd & 0x8000) {
+        bce_vhci_command_queue_deliver_completion(&q->vhci->cq, msg);
+    } else {
+        pr_warn("bce-vhci: Unhandled system event: %x s=%x p1=%x p2=%llx\n",
+                msg->cmd, msg->status, msg->param1, msg->param2);
+    }
+}
+
+static void bce_vhci_handle_usb_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg)
+{
+    bce_vhci_device_t devid;
+    u8 endp;
+    struct bce_vhci_device *dev;
+    if (msg->cmd & 0x8000) {
+        bce_vhci_command_queue_deliver_completion(&q->vhci->cq, msg);
+    } else if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST || msg->cmd == BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS) {
+        devid = (bce_vhci_device_t) (msg->param1 & 0xff);
+        endp = bce_vhci_endpoint_index((u8) ((msg->param1 >> 8) & 0xff));
+        dev = q->vhci->devices[devid];
+        if (!dev || (dev->tq_mask & BIT(endp)) == 0) {
+            pr_err("bce-vhci: Didn't find destination for transfer queue event\n");
+            return;
+        }
+        bce_vhci_transfer_queue_event(&dev->tq[endp], msg);
+    } else {
+        pr_warn("bce-vhci: Unhandled USB event: %x s=%x p1=%x p2=%llx\n",
+                msg->cmd, msg->status, msg->param1, msg->param2);
+    }
+}
+
+
+
+static const struct hc_driver bce_vhci_driver = {
+        .description = "bce-vhci",
+        .product_desc = "BCE VHCI Host Controller",
+        .hcd_priv_size = sizeof(struct bce_vhci *),
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5,4,0)
+        .flags = HCD_USB2,
+#else
+        .flags = HCD_USB2 | HCD_DMA,
+#endif
+
+        .start = bce_vhci_start,
+        .stop = bce_vhci_stop,
+        .hub_status_data = bce_vhci_hub_status_data,
+        .hub_control = bce_vhci_hub_control,
+        .urb_enqueue = bce_vhci_urb_enqueue,
+        .urb_dequeue = bce_vhci_urb_dequeue,
+        .enable_device = bce_vhci_enable_device,
+        .free_dev = bce_vhci_free_device,
+        .address_device = bce_vhci_address_device,
+        .add_endpoint = bce_vhci_add_endpoint,
+        .drop_endpoint = bce_vhci_drop_endpoint,
+        .endpoint_reset = bce_vhci_endpoint_reset,
+        .check_bandwidth = bce_vhci_check_bandwidth,
+        .get_frame_number = bce_vhci_get_frame_number,
+        .bus_suspend = bce_vhci_bus_suspend,
+        .bus_resume = bce_vhci_bus_resume
+};
+
+
+int __init bce_vhci_module_init(void)
+{
+    int result;
+    if ((result = alloc_chrdev_region(&bce_vhci_chrdev, 0, 1, "bce-vhci")))
+        goto fail_chrdev;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0)
+    bce_vhci_class = class_create(THIS_MODULE, "bce-vhci");
+#else
+    bce_vhci_class = class_create("bce-vhci");
+#endif
+    if (IS_ERR(bce_vhci_class)) {
+        result = PTR_ERR(bce_vhci_class);
+        goto fail_class;
+    }
+    return 0;
+
+fail_class:
+    class_destroy(bce_vhci_class);
+fail_chrdev:
+    unregister_chrdev_region(bce_vhci_chrdev, 1);
+    if (!result)
+        result = -EINVAL;
+    return result;
+}
+void __exit bce_vhci_module_exit(void)
+{
+    class_destroy(bce_vhci_class);
+    unregister_chrdev_region(bce_vhci_chrdev, 1);
+}
+
+module_param_named(vhci_port_mask, bce_vhci_port_mask, ushort, 0444);
+MODULE_PARM_DESC(vhci_port_mask, "Specifies which VHCI ports are enabled");
diff --git a/drivers/staging/apple-bce/vhci/vhci.h b/drivers/staging/apple-bce/vhci/vhci.h
new file mode 100644
index 000000000000..6c2e22622f4c
--- /dev/null
+++ b/drivers/staging/apple-bce/vhci/vhci.h
@@ -0,0 +1,52 @@
+#ifndef BCE_VHCI_H
+#define BCE_VHCI_H
+
+#include "queue.h"
+#include "transfer.h"
+
+struct usb_hcd;
+struct bce_queue_cq;
+
+struct bce_vhci_device {
+    struct bce_vhci_transfer_queue tq[32];
+    u32 tq_mask;
+};
+struct bce_vhci {
+    struct apple_bce_device *dev;
+    dev_t vdevt;
+    struct device *vdev;
+    struct usb_hcd *hcd;
+    struct spinlock hcd_spinlock;
+    struct bce_vhci_message_queue msg_commands;
+    struct bce_vhci_message_queue msg_system;
+    struct bce_vhci_message_queue msg_isochronous;
+    struct bce_vhci_message_queue msg_interrupt;
+    struct bce_vhci_message_queue msg_asynchronous;
+    struct spinlock msg_asynchronous_lock;
+    struct bce_vhci_command_queue cq;
+    struct bce_queue_cq *ev_cq;
+    struct bce_vhci_event_queue ev_commands;
+    struct bce_vhci_event_queue ev_system;
+    struct bce_vhci_event_queue ev_isochronous;
+    struct bce_vhci_event_queue ev_interrupt;
+    struct bce_vhci_event_queue ev_asynchronous;
+    u16 port_mask;
+    u8 port_count;
+    u16 port_power_mask;
+    bce_vhci_device_t port_to_device[16];
+    struct bce_vhci_device *devices[16];
+    struct workqueue_struct *tq_state_wq;
+    struct work_struct w_fw_events;
+};
+
+int __init bce_vhci_module_init(void);
+void __exit bce_vhci_module_exit(void);
+
+int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci);
+void bce_vhci_destroy(struct bce_vhci *vhci);
+int bce_vhci_start(struct usb_hcd *hcd);
+void bce_vhci_stop(struct usb_hcd *hcd);
+
+struct bce_vhci *bce_vhci_from_hcd(struct usb_hcd *hcd);
+
+#endif //BCE_VHCI_H
diff --git a/include/linux/hid.h b/include/linux/hid.h
index dce862cafbbd..5952d95a53e8 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -625,7 +625,9 @@ struct hid_input {
 enum hid_type {
 	HID_TYPE_OTHER = 0,
 	HID_TYPE_USBMOUSE,
-	HID_TYPE_USBNONE
+	HID_TYPE_USBNONE,
+	HID_TYPE_SPI_KEYBOARD,
+	HID_TYPE_SPI_MOUSE,
 };
 
 enum hid_battery_status {
@@ -786,6 +788,8 @@ struct hid_descriptor {
 	.bus = BUS_BLUETOOTH, .vendor = (ven), .product = (prod)
 #define HID_I2C_DEVICE(ven, prod)				\
 	.bus = BUS_I2C, .vendor = (ven), .product = (prod)
+#define HID_SPI_DEVICE(ven, prod)				\
+	.bus = BUS_SPI, .vendor = (ven), .product = (prod)
 
 #define HID_REPORT_ID(rep) \
 	.report_type = (rep)
diff --git a/include/linux/soc/apple/dockchannel.h b/include/linux/soc/apple/dockchannel.h
new file mode 100644
index 000000000000..0b7093935ddf
--- /dev/null
+++ b/include/linux/soc/apple/dockchannel.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/*
+ * Apple Dockchannel devices
+ * Copyright (C) The Asahi Linux Contributors
+ */
+#ifndef _LINUX_APPLE_DOCKCHANNEL_H_
+#define _LINUX_APPLE_DOCKCHANNEL_H_
+
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/of_platform.h>
+
+#if IS_ENABLED(CONFIG_APPLE_DOCKCHANNEL)
+
+struct dockchannel;
+
+struct dockchannel *dockchannel_init(struct platform_device *pdev);
+
+int dockchannel_send(struct dockchannel *dockchannel, const void *buf, size_t count);
+int dockchannel_recv(struct dockchannel *dockchannel, void *buf, size_t count);
+int dockchannel_await(struct dockchannel *dockchannel,
+		      void (*callback)(void *cookie, size_t avail),
+		      void *cookie, size_t count);
+
+#endif
+#endif
-- 
2.52.0

From b20faac74bcfcb967681b0d60487c105ac08daa3 Mon Sep 17 00:00:00 2001
From: Eric Naim <dnaim@cachyos.org>
Date: Mon, 26 Jan 2026 17:33:32 +0800
Subject: [PATCH 8/9] vesa-dsc-bpp

Signed-off-by: Eric Naim <dnaim@cachyos.org>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  16 +++
 drivers/gpu/drm/drm_displayid_internal.h      |  11 ++
 drivers/gpu/drm/drm_edid.c                    | 102 +++++++++++-------
 include/drm/drm_connector.h                   |   6 ++
 include/drm/drm_modes.h                       |  10 ++
 5 files changed, 109 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index a0fad8c73db7..6c26d6067ee6 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6779,6 +6779,11 @@ static void fill_stream_properties_from_drm_display_mode(
 
 	stream->output_color_space = get_output_color_space(timing_out, connector_state);
 	stream->content_type = get_output_content_type(connector_state);
+
+	/* DisplayID Type VII pass-through timings. */
+	if (mode_in->dsc_passthrough_timings_support && info->dp_dsc_bpp_x16 != 0) {
+		stream->timing.dsc_fixed_bits_per_pixel_x16 = info->dp_dsc_bpp_x16;
+	}
 }
 
 static void fill_audio_info(struct audio_info *audio_info,
@@ -7237,6 +7242,7 @@ create_stream_for_sink(struct drm_connector *connector,
 	struct drm_display_mode mode;
 	struct drm_display_mode saved_mode;
 	struct drm_display_mode *freesync_mode = NULL;
+	struct drm_display_mode *dsc_passthru_mode = NULL;
 	bool native_mode_found = false;
 	bool recalculate_timing = false;
 	bool scale = dm_state->scaling != RMX_OFF;
@@ -7328,6 +7334,16 @@ create_stream_for_sink(struct drm_connector *connector,
 		}
 	}
 
+	list_for_each_entry(dsc_passthru_mode, &connector->modes, head) {
+		if (dsc_passthru_mode->hdisplay == mode.hdisplay &&
+		    dsc_passthru_mode->vdisplay == mode.vdisplay &&
+		    drm_mode_vrefresh(dsc_passthru_mode) == mode_refresh) {
+			mode.dsc_passthrough_timings_support =
+				dsc_passthru_mode->dsc_passthrough_timings_support;
+			break;
+		}
+	}
+
 	if (recalculate_timing)
 		drm_mode_set_crtcinfo(&saved_mode, 0);
 
diff --git a/drivers/gpu/drm/drm_displayid_internal.h b/drivers/gpu/drm/drm_displayid_internal.h
index 5b1b32f73516..8f1a2f33ca1a 100644
--- a/drivers/gpu/drm/drm_displayid_internal.h
+++ b/drivers/gpu/drm/drm_displayid_internal.h
@@ -97,6 +97,7 @@ struct displayid_header {
 	u8 ext_count;
 } __packed;
 
+#define DISPLAYID_BLOCK_REV	GENMASK(2, 0)
 struct displayid_block {
 	u8 tag;
 	u8 rev;
@@ -125,6 +126,7 @@ struct displayid_detailed_timings_1 {
 	__le16 vsw;
 } __packed;
 
+#define DISPLAYID_BLOCK_PASSTHROUGH_TIMINGS_SUPPORT	BIT(3)
 struct displayid_detailed_timing_block {
 	struct displayid_block base;
 	struct displayid_detailed_timings_1 timings[];
@@ -137,19 +139,28 @@ struct displayid_formula_timings_9 {
 	u8 vrefresh;
 } __packed;
 
+#define DISPLAYID_BLOCK_DESCRIPTOR_PAYLOAD_BYTES	GENMASK(6, 4)
 struct displayid_formula_timing_block {
 	struct displayid_block base;
 	struct displayid_formula_timings_9 timings[];
 } __packed;
 
+#define DISPLAYID_VESA_DP_TYPE		GENMASK(2, 0)
 #define DISPLAYID_VESA_MSO_OVERLAP	GENMASK(3, 0)
 #define DISPLAYID_VESA_MSO_MODE		GENMASK(6, 5)
+#define DISPLAYID_VESA_DSC_BPP_INT	GENMASK(5, 0)
+#define DISPLAYID_VESA_DSC_BPP_FRACT	GENMASK(3, 0)
+
+#define DISPLAYID_VESA_DP_TYPE_EDP	0
+#define DISPLAYID_VESA_DP_TYPE_DP	1
 
 struct displayid_vesa_vendor_specific_block {
 	struct displayid_block base;
 	u8 oui[3];
 	u8 data_structure_type;
 	u8 mso;
+	u8 dsc_bpp_int;
+	u8 dsc_bpp_fract;
 } __packed;
 
 /*
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 056eff8cbd1a..26d53a548a27 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -45,6 +45,7 @@
 #include <drm/drm_edid.h>
 #include <drm/drm_eld.h>
 #include <drm/drm_encoder.h>
+#include <drm/drm_fixed.h>
 #include <drm/drm_print.h>
 
 #include "drm_crtc_internal.h"
@@ -6566,12 +6567,13 @@ static void drm_get_monitor_range(struct drm_connector *connector,
 		    info->monitor_range.min_vfreq, info->monitor_range.max_vfreq);
 }
 
-static void drm_parse_vesa_mso_data(struct drm_connector *connector,
-				    const struct displayid_block *block)
+static void drm_parse_vesa_specific_block(struct drm_connector *connector,
+					  const struct displayid_block *block)
 {
 	struct displayid_vesa_vendor_specific_block *vesa =
 		(struct displayid_vesa_vendor_specific_block *)block;
 	struct drm_display_info *info = &connector->display_info;
+	int dp_type;
 
 	if (block->num_bytes < 3) {
 		drm_dbg_kms(connector->dev,
@@ -6583,51 +6585,73 @@ static void drm_parse_vesa_mso_data(struct drm_connector *connector,
 	if (oui(vesa->oui[0], vesa->oui[1], vesa->oui[2]) != VESA_IEEE_OUI)
 		return;
 
-	if (sizeof(*vesa) != sizeof(*block) + block->num_bytes) {
+	if (block->num_bytes < 5) {
 		drm_dbg_kms(connector->dev,
 			    "[CONNECTOR:%d:%s] Unexpected VESA vendor block size\n",
 			    connector->base.id, connector->name);
 		return;
 	}
 
-	switch (FIELD_GET(DISPLAYID_VESA_MSO_MODE, vesa->mso)) {
-	default:
-		drm_dbg_kms(connector->dev, "[CONNECTOR:%d:%s] Reserved MSO mode value\n",
+	dp_type = FIELD_GET(DISPLAYID_VESA_DP_TYPE, vesa->data_structure_type);
+	if (dp_type > 1) {
+		drm_dbg_kms(connector->dev, "[CONNECTOR:%d:%s] Reserved dp type value\n",
 			    connector->base.id, connector->name);
-		fallthrough;
-	case 0:
-		info->mso_stream_count = 0;
-		break;
-	case 1:
-		info->mso_stream_count = 2; /* 2 or 4 links */
-		break;
-	case 2:
-		info->mso_stream_count = 4; /* 4 links */
-		break;
 	}
 
-	if (!info->mso_stream_count) {
+	/* MSO is only supported for eDP */
+	if (dp_type == DISPLAYID_VESA_DP_TYPE_EDP) {
+		switch (FIELD_GET(DISPLAYID_VESA_MSO_MODE, vesa->mso)) {
+		default:
+			drm_dbg_kms(connector->dev, "[CONNECTOR:%d:%s] Reserved MSO mode value\n",
+				    connector->base.id, connector->name);
+			fallthrough;
+		case 0:
+			info->mso_stream_count = 0;
+			break;
+		case 1:
+			info->mso_stream_count = 2; /* 2 or 4 links */
+			break;
+		case 2:
+			info->mso_stream_count = 4; /* 4 links */
+			break;
+		}
+	}
+
+	if (info->mso_stream_count) {
+		info->mso_pixel_overlap = FIELD_GET(DISPLAYID_VESA_MSO_OVERLAP, vesa->mso);
+		if (info->mso_pixel_overlap > 8) {
+			drm_dbg_kms(connector->dev,
+				    "[CONNECTOR:%d:%s] Reserved MSO pixel overlap value %u\n",
+				    connector->base.id, connector->name,
+				    info->mso_pixel_overlap);
+			info->mso_pixel_overlap = 8;
+		}
+		drm_dbg_kms(connector->dev,
+			    "[CONNECTOR:%d:%s] MSO stream count %u, pixel overlap %u\n",
+			    connector->base.id, connector->name,
+			    info->mso_stream_count, info->mso_pixel_overlap);
+	} else {
 		info->mso_pixel_overlap = 0;
+	}
+
+	if (block->num_bytes < 7) {
+		/* DSC bpp is optional */
 		return;
 	}
 
-	info->mso_pixel_overlap = FIELD_GET(DISPLAYID_VESA_MSO_OVERLAP, vesa->mso);
-	if (info->mso_pixel_overlap > 8) {
+	info->dp_dsc_bpp_x16 = FIELD_GET(DISPLAYID_VESA_DSC_BPP_INT, vesa->dsc_bpp_int) << 4 |
+			       FIELD_GET(DISPLAYID_VESA_DSC_BPP_FRACT, vesa->dsc_bpp_fract);
+
+	if (info->dp_dsc_bpp_x16 > 0) {
 		drm_dbg_kms(connector->dev,
-			    "[CONNECTOR:%d:%s] Reserved MSO pixel overlap value %u\n",
+			    "[CONNECTOR:%d:%s] DSC bits per pixel " FXP_Q4_FMT "\n",
 			    connector->base.id, connector->name,
-			    info->mso_pixel_overlap);
-		info->mso_pixel_overlap = 8;
+			    FXP_Q4_ARGS(info->dp_dsc_bpp_x16));
 	}
-
-	drm_dbg_kms(connector->dev,
-		    "[CONNECTOR:%d:%s] MSO stream count %u, pixel overlap %u\n",
-		    connector->base.id, connector->name,
-		    info->mso_stream_count, info->mso_pixel_overlap);
 }
 
-static void drm_update_mso(struct drm_connector *connector,
-			   const struct drm_edid *drm_edid)
+static void drm_update_vesa_specific_block(struct drm_connector *connector,
+					   const struct drm_edid *drm_edid)
 {
 	const struct displayid_block *block;
 	struct displayid_iter iter;
@@ -6635,7 +6659,7 @@ static void drm_update_mso(struct drm_connector *connector,
 	displayid_iter_edid_begin(drm_edid, &iter);
 	displayid_iter_for_each(block, &iter) {
 		if (block->tag == DATA_BLOCK_2_VENDOR_SPECIFIC)
-			drm_parse_vesa_mso_data(connector, block);
+			drm_parse_vesa_specific_block(connector, block);
 	}
 	displayid_iter_end(&iter);
 }
@@ -6672,6 +6696,7 @@ static void drm_reset_display_info(struct drm_connector *connector)
 	info->mso_stream_count = 0;
 	info->mso_pixel_overlap = 0;
 	info->max_dsc_bpp = 0;
+	info->dp_dsc_bpp_x16 = 0;
 
 	kfree(info->vics);
 	info->vics = NULL;
@@ -6795,7 +6820,7 @@ static void update_display_info(struct drm_connector *connector,
 	if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB422)
 		info->color_formats |= DRM_COLOR_FORMAT_YCBCR422;
 
-	drm_update_mso(connector, drm_edid);
+	drm_update_vesa_specific_block(connector, drm_edid);
 
 out:
 	if (drm_edid_has_internal_quirk(connector, EDID_QUIRK_NON_DESKTOP)) {
@@ -6825,8 +6850,8 @@ static void update_display_info(struct drm_connector *connector,
 }
 
 static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_device *dev,
-							    const struct displayid_detailed_timings_1 *timings,
-							    bool type_7)
+							    const struct displayid_block *block,
+							    const struct displayid_detailed_timings_1 *timings)
 {
 	struct drm_display_mode *mode;
 	unsigned int pixel_clock = (timings->pixel_clock[0] |
@@ -6842,11 +6867,16 @@ static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_device *d
 	unsigned int vsync_width = le16_to_cpu(timings->vsw) + 1;
 	bool hsync_positive = le16_to_cpu(timings->hsync) & (1 << 15);
 	bool vsync_positive = le16_to_cpu(timings->vsync) & (1 << 15);
+	bool type_7 = block->tag == DATA_BLOCK_2_TYPE_7_DETAILED_TIMING;
 
 	mode = drm_mode_create(dev);
 	if (!mode)
 		return NULL;
 
+	if (type_7 && FIELD_GET(DISPLAYID_BLOCK_REV, block->rev) >= 1)
+		mode->dsc_passthrough_timings_support =
+			block->rev & DISPLAYID_BLOCK_PASSTHROUGH_TIMINGS_SUPPORT;
+
 	/* resolution is kHz for type VII, and 10 kHz for type I */
 	mode->clock = type_7 ? pixel_clock : pixel_clock * 10;
 	mode->hdisplay = hactive;
@@ -6879,7 +6909,6 @@ static int add_displayid_detailed_1_modes(struct drm_connector *connector,
 	int num_timings;
 	struct drm_display_mode *newmode;
 	int num_modes = 0;
-	bool type_7 = block->tag == DATA_BLOCK_2_TYPE_7_DETAILED_TIMING;
 	/* blocks must be multiple of 20 bytes length */
 	if (block->num_bytes % 20)
 		return 0;
@@ -6888,7 +6917,7 @@ static int add_displayid_detailed_1_modes(struct drm_connector *connector,
 	for (i = 0; i < num_timings; i++) {
 		struct displayid_detailed_timings_1 *timings = &det->timings[i];
 
-		newmode = drm_mode_displayid_detailed(connector->dev, timings, type_7);
+		newmode = drm_mode_displayid_detailed(connector->dev, block, timings);
 		if (!newmode)
 			continue;
 
@@ -6935,7 +6964,8 @@ static int add_displayid_formula_modes(struct drm_connector *connector,
 	struct drm_display_mode *newmode;
 	int num_modes = 0;
 	bool type_10 = block->tag == DATA_BLOCK_2_TYPE_10_FORMULA_TIMING;
-	int timing_size = 6 + ((formula_block->base.rev & 0x70) >> 4);
+	int timing_size = 6 +
+		FIELD_GET(DISPLAYID_BLOCK_DESCRIPTOR_PAYLOAD_BYTES, formula_block->base.rev);
 
 	/* extended blocks are not supported yet */
 	if (timing_size != 6)
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index dab9d5521f41..6fa7f2f67033 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -884,6 +884,12 @@ struct drm_display_info {
 	 */
 	u32 max_dsc_bpp;
 
+	/**
+	 * @dp_dsc_bpp: DP Display-Stream-Compression (DSC) timing's target
+	 * DSC bits per pixel in 6.4 fixed point format. 0 means undefined.
+	 */
+	u16 dp_dsc_bpp_x16;
+
 	/**
 	 * @vics: Array of vics_len VICs. Internal to EDID parsing.
 	 */
diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h
index b9bb92e4b029..312e5c03af9a 100644
--- a/include/drm/drm_modes.h
+++ b/include/drm/drm_modes.h
@@ -417,6 +417,16 @@ struct drm_display_mode {
 	 */
 	enum hdmi_picture_aspect picture_aspect_ratio;
 
+	/**
+	 * @dsc_passthrough_timing_support:
+	 *
+	 * Indicates whether this mode timing descriptor is supported
+	 * with specific target DSC bits per pixel only.
+	 *
+	 * VESA vendor-specific data block shall exist with the relevant
+	 * DSC bits per pixel declaration when this flag is set to true.
+	 */
+	bool dsc_passthrough_timings_support;
 };
 
 /**
-- 
2.52.0

From a164c0f6f4e59eec8c81740449189b1e729ec506 Mon Sep 17 00:00:00 2001
From: Eric Naim <dnaim@cachyos.org>
Date: Mon, 26 Jan 2026 17:33:33 +0800
Subject: [PATCH 9/9] vmscape

Signed-off-by: Eric Naim <dnaim@cachyos.org>
---
 Documentation/admin-guide/hw-vuln/vmscape.rst |  8 +++
 .../admin-guide/kernel-parameters.txt         |  4 +-
 arch/x86/Kconfig                              |  1 +
 arch/x86/entry/entry_64.S                     | 13 +++-
 arch/x86/include/asm/cpufeatures.h            |  2 +-
 arch/x86/include/asm/entry-common.h           |  9 ++-
 arch/x86/include/asm/nospec-branch.h          | 11 +++-
 arch/x86/kernel/cpu/bugs.c                    | 65 +++++++++++++++----
 arch/x86/kvm/x86.c                            |  4 +-
 arch/x86/net/bpf_jit_comp.c                   |  2 +
 10 files changed, 90 insertions(+), 29 deletions(-)

diff --git a/Documentation/admin-guide/hw-vuln/vmscape.rst b/Documentation/admin-guide/hw-vuln/vmscape.rst
index d9b9a2b6c114..580f288ae8bf 100644
--- a/Documentation/admin-guide/hw-vuln/vmscape.rst
+++ b/Documentation/admin-guide/hw-vuln/vmscape.rst
@@ -86,6 +86,10 @@ The possible values in this file are:
    run a potentially malicious guest and issues an IBPB before the first
    exit to userspace after VM-exit.
 
+ * 'Mitigation: Clear BHB before exit to userspace':
+
+   As above, conditional BHB clearing mitigation is enabled.
+
  * 'Mitigation: IBPB on VMEXIT':
 
    IBPB is issued on every VM-exit. This occurs when other mitigations like
@@ -108,3 +112,7 @@ The mitigation can be controlled via the ``vmscape=`` command line parameter:
 
    Force vulnerability detection and mitigation even on processors that are
    not known to be affected.
+
+ * ``vmscape=on``:
+
+   Choose the mitigation based on the VMSCAPE variant the CPU is affected by.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e70e737f6daa..43dff2e3cae4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -8276,9 +8276,11 @@ Kernel parameters
 
 			off		- disable the mitigation
 			ibpb		- use Indirect Branch Prediction Barrier
-					  (IBPB) mitigation (default)
+					  (IBPB) mitigation
 			force		- force vulnerability detection even on
 					  unaffected processors
+			on		- (default) selects IBPB or BHB clear
+					  mitigation based on CPU
 
 	vsyscall=	[X86-64,EARLY]
 			Controls the behavior of vsyscalls (i.e. calls to
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 80527299f859..e03e35a2a6ce 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2710,6 +2710,7 @@ config MITIGATION_TSA
 config MITIGATION_VMSCAPE
 	bool "Mitigate VMSCAPE"
 	depends on KVM
+	depends on HAVE_STATIC_CALL
 	default y
 	help
 	  Enable mitigation for VMSCAPE attacks. VMSCAPE is a hardware security
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f9983a1907bf..6d93602dd309 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1529,12 +1529,19 @@ SYM_CODE_END(rewind_stack_and_make_dead)
  * refactored in the future if needed. The .skips are for safety, to ensure
  * that all RETs are in the second half of a cacheline to mitigate Indirect
  * Target Selection, rather than taking the slowpath via its_return_thunk.
+ *
+ * Note, callers should use a speculation barrier like LFENCE immediately after
+ * a call to this function to ensure BHB is cleared before indirect branches.
  */
 SYM_FUNC_START(clear_bhb_loop)
 	ANNOTATE_NOENDBR
 	push	%rbp
 	mov	%rsp, %rbp
-	movl	$5, %ecx
+
+	/* loop count differs based on BHI_CTRL, see Intel's BHI guidance */
+	ALTERNATIVE "movl $5,  %ecx; movl $5, %edx",	\
+		    "movl $12, %ecx; movl $7, %edx", X86_FEATURE_BHI_CTRL
+
 	ANNOTATE_INTRA_FUNCTION_CALL
 	call	1f
 	jmp	5f
@@ -1555,7 +1562,7 @@ SYM_FUNC_START(clear_bhb_loop)
 	 * but some Clang versions (e.g. 18) don't like this.
 	 */
 	.skip 32 - 18, 0xcc
-2:	movl	$5, %eax
+2:	movl	%edx, %eax
 3:	jmp	4f
 	nop
 4:	sub	$1, %eax
@@ -1563,7 +1570,7 @@ SYM_FUNC_START(clear_bhb_loop)
 	sub	$1, %ecx
 	jnz	1b
 .Lret2:	RET
-5:	lfence
+5:
 	pop	%rbp
 	RET
 SYM_FUNC_END(clear_bhb_loop)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c3b53beb1300..aa39430476d6 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -501,7 +501,7 @@
 #define X86_FEATURE_TSA_SQ_NO		(21*32+11) /* AMD CPU not vulnerable to TSA-SQ */
 #define X86_FEATURE_TSA_L1_NO		(21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
 #define X86_FEATURE_CLEAR_CPU_BUF_VM	(21*32+13) /* Clear CPU buffers using VERW before VMRUN */
-#define X86_FEATURE_IBPB_EXIT_TO_USER	(21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */
+/* Free */
 #define X86_FEATURE_ABMC		(21*32+15) /* Assignable Bandwidth Monitoring Counters */
 #define X86_FEATURE_MSR_IMM		(21*32+16) /* MSR immediate form instructions */
 #define X86_FEATURE_SGX_EUPDATESVN	(21*32+17) /* Support for ENCLS[EUPDATESVN] instruction */
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index ce3eb6d5fdf9..783e7cb50cae 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -4,6 +4,7 @@
 
 #include <linux/randomize_kstack.h>
 #include <linux/user-return-notifier.h>
+#include <linux/static_call_types.h>
 
 #include <asm/nospec-branch.h>
 #include <asm/io_bitmap.h>
@@ -94,11 +95,9 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
 	 */
 	choose_random_kstack_offset(rdtsc());
 
-	/* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */
-	if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) &&
-	    this_cpu_read(x86_ibpb_exit_to_user)) {
-		indirect_branch_prediction_barrier();
-		this_cpu_write(x86_ibpb_exit_to_user, false);
+	if (unlikely(this_cpu_read(x86_predictor_flush_exit_to_user))) {
+		static_call_cond(vmscape_predictor_flush)();
+		this_cpu_write(x86_predictor_flush_exit_to_user, false);
 	}
 }
 #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 4f4b5e8a1574..80efdb6645ba 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -331,11 +331,11 @@
 
 #ifdef CONFIG_X86_64
 .macro CLEAR_BRANCH_HISTORY
-	ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
+	ALTERNATIVE "", "call clear_bhb_loop; lfence", X86_FEATURE_CLEAR_BHB_LOOP
 .endm
 
 .macro CLEAR_BRANCH_HISTORY_VMEXIT
-	ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_VMEXIT
+	ALTERNATIVE "", "call clear_bhb_loop; lfence", X86_FEATURE_CLEAR_BHB_VMEXIT
 .endm
 #else
 #define CLEAR_BRANCH_HISTORY
@@ -390,6 +390,8 @@ extern void write_ibpb(void);
 
 #ifdef CONFIG_X86_64
 extern void clear_bhb_loop(void);
+#else
+static inline void clear_bhb_loop(void) {}
 #endif
 
 extern void (*x86_return_thunk)(void);
@@ -533,7 +535,7 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
 		: "memory");
 }
 
-DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user);
+DECLARE_PER_CPU(bool, x86_predictor_flush_exit_to_user);
 
 static inline void indirect_branch_prediction_barrier(void)
 {
@@ -542,6 +544,9 @@ static inline void indirect_branch_prediction_barrier(void)
 			    :: "rax", "rcx", "rdx", "memory");
 }
 
+#include <linux/static_call_types.h>
+DECLARE_STATIC_CALL(vmscape_predictor_flush, write_ibpb);
+
 /* The Intel SPEC CTRL MSR base value cache */
 extern u64 x86_spec_ctrl_base;
 DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d0a2847a4bb0..2818bfcb9f9f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -62,12 +62,11 @@ DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
 EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current);
 
 /*
- * Set when the CPU has run a potentially malicious guest. An IBPB will
- * be needed to before running userspace. That IBPB will flush the branch
- * predictor content.
+ * Set when the CPU has run a potentially malicious guest. Indicates that a
+ * branch predictor flush is needed before running userspace.
  */
-DEFINE_PER_CPU(bool, x86_ibpb_exit_to_user);
-EXPORT_PER_CPU_SYMBOL_GPL(x86_ibpb_exit_to_user);
+DEFINE_PER_CPU(bool, x86_predictor_flush_exit_to_user);
+EXPORT_PER_CPU_SYMBOL_GPL(x86_predictor_flush_exit_to_user);
 
 u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
 
@@ -230,6 +229,9 @@ static void x86_amd_ssb_disable(void)
 		wrmsrq(MSR_AMD64_LS_CFG, msrval);
 }
 
+DEFINE_STATIC_CALL_NULL(vmscape_predictor_flush, write_ibpb);
+EXPORT_STATIC_CALL_GPL(vmscape_predictor_flush);
+
 #undef pr_fmt
 #define pr_fmt(fmt)	"MDS: " fmt
 
@@ -3049,15 +3051,19 @@ static void __init srso_apply_mitigation(void)
 enum vmscape_mitigations {
 	VMSCAPE_MITIGATION_NONE,
 	VMSCAPE_MITIGATION_AUTO,
+	VMSCAPE_MITIGATION_ON,
 	VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER,
 	VMSCAPE_MITIGATION_IBPB_ON_VMEXIT,
+	VMSCAPE_MITIGATION_BHB_CLEAR_EXIT_TO_USER,
 };
 
 static const char * const vmscape_strings[] = {
-	[VMSCAPE_MITIGATION_NONE]		= "Vulnerable",
+	[VMSCAPE_MITIGATION_NONE]			= "Vulnerable",
 	/* [VMSCAPE_MITIGATION_AUTO] */
-	[VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER]	= "Mitigation: IBPB before exit to userspace",
-	[VMSCAPE_MITIGATION_IBPB_ON_VMEXIT]	= "Mitigation: IBPB on VMEXIT",
+	/* [VMSCAPE_MITIGATION_ON] */
+	[VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER]		= "Mitigation: IBPB before exit to userspace",
+	[VMSCAPE_MITIGATION_IBPB_ON_VMEXIT]		= "Mitigation: IBPB on VMEXIT",
+	[VMSCAPE_MITIGATION_BHB_CLEAR_EXIT_TO_USER]	= "Mitigation: Clear BHB before exit to userspace",
 };
 
 static enum vmscape_mitigations vmscape_mitigation __ro_after_init =
@@ -3074,7 +3080,9 @@ static int __init vmscape_parse_cmdline(char *str)
 		vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
 	} else if (!strcmp(str, "force")) {
 		setup_force_cpu_bug(X86_BUG_VMSCAPE);
-		vmscape_mitigation = VMSCAPE_MITIGATION_AUTO;
+		vmscape_mitigation = VMSCAPE_MITIGATION_ON;
+	} else if (!strcmp(str, "on")) {
+		vmscape_mitigation = VMSCAPE_MITIGATION_ON;
 	} else {
 		pr_err("Ignoring unknown vmscape=%s option.\n", str);
 	}
@@ -3085,17 +3093,42 @@ early_param("vmscape", vmscape_parse_cmdline);
 
 static void __init vmscape_select_mitigation(void)
 {
-	if (!boot_cpu_has_bug(X86_BUG_VMSCAPE) ||
-	    !boot_cpu_has(X86_FEATURE_IBPB)) {
+	if (!boot_cpu_has_bug(X86_BUG_VMSCAPE)) {
 		vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
 		return;
 	}
 
-	if (vmscape_mitigation == VMSCAPE_MITIGATION_AUTO) {
-		if (should_mitigate_vuln(X86_BUG_VMSCAPE))
+	if ((vmscape_mitigation == VMSCAPE_MITIGATION_AUTO) &&
+	    !should_mitigate_vuln(X86_BUG_VMSCAPE))
+		vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
+
+	switch (vmscape_mitigation) {
+	case VMSCAPE_MITIGATION_NONE:
+		break;
+
+	case VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER:
+		if (!boot_cpu_has(X86_FEATURE_IBPB))
+			vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
+		break;
+
+	case VMSCAPE_MITIGATION_AUTO:
+	case VMSCAPE_MITIGATION_ON:
+		/*
+		 * CPUs with BHI_CTRL(ADL and newer) can avoid the IBPB and use BHB
+		 * clear sequence. These CPUs are only vulnerable to the BHI variant
+		 * of the VMSCAPE attack and does not require an IBPB flush. In
+		 * 32-bit mode BHB clear sequence is not supported.
+		 */
+		if (boot_cpu_has(X86_FEATURE_BHI_CTRL) && IS_ENABLED(CONFIG_X86_64))
+			vmscape_mitigation = VMSCAPE_MITIGATION_BHB_CLEAR_EXIT_TO_USER;
+		else if (boot_cpu_has(X86_FEATURE_IBPB))
 			vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
 		else
 			vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
+		break;
+
+	default:
+		break;
 	}
 }
 
@@ -3114,7 +3147,9 @@ static void __init vmscape_update_mitigation(void)
 static void __init vmscape_apply_mitigation(void)
 {
 	if (vmscape_mitigation == VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER)
-		setup_force_cpu_cap(X86_FEATURE_IBPB_EXIT_TO_USER);
+		static_call_update(vmscape_predictor_flush, write_ibpb);
+	else if (vmscape_mitigation == VMSCAPE_MITIGATION_BHB_CLEAR_EXIT_TO_USER)
+		static_call_update(vmscape_predictor_flush, clear_bhb_loop);
 }
 
 #undef pr_fmt
@@ -3203,9 +3238,11 @@ void cpu_bugs_smt_update(void)
 	switch (vmscape_mitigation) {
 	case VMSCAPE_MITIGATION_NONE:
 	case VMSCAPE_MITIGATION_AUTO:
+	case VMSCAPE_MITIGATION_ON:
 		break;
 	case VMSCAPE_MITIGATION_IBPB_ON_VMEXIT:
 	case VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER:
+	case VMSCAPE_MITIGATION_BHB_CLEAR_EXIT_TO_USER:
 		/*
 		 * Hypervisors can be attacked across-threads, warn for SMT when
 		 * STIBP is not already enabled system-wide.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 63afdb6bb078..da03cb43a205 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11433,8 +11433,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	 * set for the CPU that actually ran the guest, and not the CPU that it
 	 * may migrate to.
 	 */
-	if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER))
-		this_cpu_write(x86_ibpb_exit_to_user, true);
+	if (static_call_query(vmscape_predictor_flush))
+		this_cpu_write(x86_predictor_flush_exit_to_user, true);
 
 	/*
 	 * Consume any pending interrupts, including the possible source of
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b0bac2a66eff..c31508be0d72 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1620,6 +1620,8 @@ static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip,
 
 		if (emit_call(&prog, func, ip))
 			return -EINVAL;
+		/* Don't speculate past this until BHB is cleared */
+		EMIT_LFENCE();
 		EMIT1(0x59); /* pop rcx */
 		EMIT1(0x58); /* pop rax */
 	}
-- 
2.52.0