diff --git a/Documentation/devicetree/bindings/pci/apple,t8010-pcie.yaml b/Documentation/devicetree/bindings/pci/apple,t8010-pcie.yaml new file mode 100644 index 00000000000000..db6b1e88180262 --- /dev/null +++ b/Documentation/devicetree/bindings/pci/apple,t8010-pcie.yaml @@ -0,0 +1,237 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/apple,t8010-pcie.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Apple H9P/T8010 PCIe Host Controller + +maintainers: + - Hector Martin + +description: + Apple A10/T8010 devices use an older H9P PCIe root complex for the + internal storage path. It exposes one ECAM window shared by up to four + root ports, controller-specific PHY and port register windows, and a + controller-local MSI block. + +allOf: + - $ref: /schemas/pci/pci-host-bridge.yaml# + - $ref: /schemas/interrupt-controller/msi-controller.yaml# + +properties: + compatible: + const: apple,t8010-pcie + + reg: + minItems: 10 + maxItems: 10 + + reg-names: + items: + - const: config + - const: phy0 + - const: phy1 + - const: phy2 + - const: port0 + - const: port1 + - const: port2 + - const: port3 + - const: nvmmu0 + - const: pcieclk-postup + + interrupts: + description: + Four port state interrupts followed by 32 MSI interrupts and the + optional NVMMU fault interrupt for the active storage port. + minItems: 37 + maxItems: 37 + + clocks: + minItems: 3 + maxItems: 3 + + clock-names: + items: + - const: core + - const: aux + - const: ref + + power-domains: + minItems: 3 + maxItems: 3 + + power-domain-names: + items: + - const: core + - const: aux + - const: ref + + reset-gpios: + description: + PERST# GPIOs indexed by PCIe root port. + minItems: 1 + maxItems: 4 + + clkreq-gpios: + description: + CLKREQ# GPIOs indexed by PCIe root port. + minItems: 1 + maxItems: 4 + + msi-controller: true + + msi-parent: true + + apple,msi-doorbell: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + MSI doorbell address programmed into downstream endpoints. + + apple,enabled-ports: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Bitmask of root ports that should be powered and trained. + minimum: 1 + maximum: 15 + + apple,nvmmu-iova: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Base device-visible address for the reserved NVMMU/SART window. + + memory-region: + description: + Reserved physical window programmed into the H9P NVMMU/SART path. + maxItems: 1 + + interrupt-controller: true + + '#interrupt-cells': + const: 1 + +required: + - compatible + - reg + - reg-names + - interrupts + - clocks + - clock-names + - power-domains + - power-domain-names + - reset-gpios + - clkreq-gpios + - bus-range + - ranges + - msi-controller + - msi-parent + - apple,enabled-ports + - apple,nvmmu-iova + - memory-region + - '#interrupt-cells' + +unevaluatedProperties: false + +examples: + - | + #include + #include + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + pcie0_nvmmu_window: nvmmu-window@8bee00000 { + reg = <0x8 0xbee00000 0x0 0x01200000>; + no-map; + }; + }; + + soc { + #address-cells = <2>; + #size-cells = <2>; + + pcie0: pcie@610000000 { + compatible = "apple,t8010-pcie"; + device_type = "pci"; + + reg = <0x6 0x10000000 0x0 0x1000000>, + <0x6 0x00000000 0x0 0x8000>, + <0x6 0x00008000 0x0 0x4000>, + <0x6 0x0a000000 0x0 0x40000>, + <0x6 0x01000000 0x0 0x4000>, + <0x6 0x02000000 0x0 0x4000>, + <0x6 0x03000000 0x0 0x4000>, + <0x6 0x04000000 0x0 0x4000>, + <0x6 0x01004000 0x0 0x4000>, + <0x6 0x00010000 0x0 0x8000>; + reg-names = "config", "phy0", "phy1", "phy2", + "port0", "port1", "port2", "port3", + "nvmmu0", "pcieclk-postup"; + + interrupt-parent = <&aic>; + interrupts = , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + + msi-controller; + msi-parent = <&pcie0>; + apple,msi-doorbell = <0xbffff000>; + apple,enabled-ports = <0x1>; + apple,nvmmu-iova = <0xbc000000>; + memory-region = <&pcie0_nvmmu_window>; + + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + bus-range = <0x00 0x0f>; + ranges = <0x03000000 0x0 0xc0000000 + 0x7 0xc0000000 0x0 0x40000000>; + + clocks = <&clkref>, <&clkref>, <&clkref>; + clock-names = "core", "aux", "ref"; + power-domains = <&ps_pcie>, <&ps_pcie_aux>, + <&ps_pcie_ref>; + power-domain-names = "core", "aux", "ref"; + + reset-gpios = <&pinctrl_ap 12 GPIO_ACTIVE_HIGH>; + clkreq-gpios = <&pinctrl_ap 16 GPIO_ACTIVE_HIGH>; + }; + }; + +... diff --git a/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi b/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi index bd0e9c0b5696fa..882fec4e69b5f3 100644 --- a/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi +++ b/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi @@ -12,3 +12,120 @@ * now. */ #include "t8010-ipad6.dtsi" + +/ { + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + pcie0_nvmmu_window: nvmmu-window@8bee00000 { + reg = <0x8 0xbee00000 0x0 0x01200000>; + no-map; + }; + }; + + soc { + pcie0_dart0: iommu@601008000 { + compatible = "apple,t8010-dart", "apple,s5l8960x-dart"; + reg = <0x6 0x01008000 0x0 0x4000>; + #iommu-cells = <1>; + interrupt-parent = <&aic>; + interrupts = ; + power-domains = <&ps_pcie>; + apple,dma-range = <0x0 0x80000000 0x0 0x3c000000>; + }; + + pcie0: pcie@610000000 { + compatible = "apple,t8010-pcie"; + device_type = "pci"; + status = "okay"; + + reg = <0x6 0x10000000 0x0 0x1000000>, + <0x6 0x00000000 0x0 0x8000>, + <0x6 0x00008000 0x0 0x4000>, + <0x6 0x0a000000 0x0 0x40000>, + <0x6 0x01000000 0x0 0x4000>, + <0x6 0x02000000 0x0 0x4000>, + <0x6 0x03000000 0x0 0x4000>, + <0x6 0x04000000 0x0 0x4000>, + <0x6 0x01004000 0x0 0x4000>, + <0x6 0x00010000 0x0 0x8000>; + reg-names = "config", "phy0", "phy1", "phy2", + "port0", "port1", "port2", "port3", + "nvmmu0", "pcieclk-postup"; + + interrupt-parent = <&aic>; + interrupts = , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + + msi-controller; + msi-parent = <&pcie0>; + apple,msi-doorbell = <0xbffff000>; + apple,enabled-ports = <0x1>; + apple,nvmmu-iova = <0xbc000000>; + memory-region = <&pcie0_nvmmu_window>; + + iommu-map = <0x100 &pcie0_dart0 0 1>; + iommu-map-mask = <0xff00>; + + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + interrupt-controller; + bus-range = <0x00 0x0f>; + ranges = <0x03000000 0x0 0xc0000000 + 0x7 0xc0000000 0x0 0x40000000>; + + clocks = <&clkref>, <&clkref>, <&clkref>; + clock-names = "core", "aux", "ref"; + power-domains = <&ps_pcie>, <&ps_pcie_aux>, + <&ps_pcie_ref>; + power-domain-names = "core", "aux", "ref"; + + reset-gpios = <&pinctrl_ap 12 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 13 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 14 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 15 GPIO_ACTIVE_HIGH>; + clkreq-gpios = <&pinctrl_ap 16 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 17 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 18 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 19 GPIO_ACTIVE_HIGH>; + }; + }; +}; diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index 6414ec968f99ae..c07c3a77a1b7c4 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -19,7 +19,7 @@ nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o nvme-core-$(CONFIG_NVME_HOST_AUTH) += auth.o -nvme-y += pci.o +nvme-y += pci.o pci-apple-h9p.o nvme-fabrics-y += fabrics.o diff --git a/drivers/nvme/host/pci-apple-h9p.c b/drivers/nvme/host/pci-apple-h9p.c new file mode 100644 index 00000000000000..6edac48f84f4da --- /dev/null +++ b/drivers/nvme/host/pci-apple-h9p.c @@ -0,0 +1,422 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Apple H9P/T8010 PCI NVMe glue. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pci-internal.h" + +#define NVME_CMD_APPLE_H9P_FLATDMA BIT(5) + +#define APPLE_H9P_REG_INIT 0x1800 +#define APPLE_H9P_REG_INIT_REGULAR 0 +#define APPLE_H9P_REG_SCRATCH_SIZE_REQ 0x1808 +#define APPLE_H9P_REG_SCRATCH_ALIGN_REQ 0x180c +#define APPLE_H9P_REG_SCRATCH_BASE_LO 0x1810 +#define APPLE_H9P_REG_SCRATCH_BASE_HI 0x1814 +#define APPLE_H9P_REG_SCRATCH_SIZE 0x1818 +#define APPLE_H9P_REG_CORE_MASK 0x1824 +#define APPLE_H9P_REG_LOG_SIZE 0x1828 +#define APPLE_H9P_REG_BOOT_STATE 0x1b18 +#define APPLE_H9P_REG_BOOT_STATE_MAGIC 0xbfbfbfbfu +#define APPLE_H9P_NVME_MAX_SECTORS \ + (APPLE_H9P_NVMMU_MAX_PAGES * APPLE_H9P_NVMMU_PAGE_SIZE >> SECTOR_SHIFT) + +struct apple_h9p_nvme_req { + u64 pages[APPLE_H9P_NVMMU_MAX_PAGES]; + unsigned int npages; +}; + +struct apple_h9p_nvme { + dma_addr_t scratch_dma; + u32 scratch_size; + struct apple_h9p_nvme_req req[APPLE_H9P_NVMMU_MAX_REQS]; + DECLARE_BITMAP(used_req, APPLE_H9P_NVMMU_MAX_REQS); + unsigned int last_req; + /* Protects the FlatDMA request-slot bitmap. */ + spinlock_t req_lock; +}; + +static struct apple_h9p_nvme *nvme_pci_apple_h9p(struct nvme_dev *dev) +{ + return dev->dma_data; +} + +static int nvme_pci_apple_h9p_init(struct nvme_dev *dev, int node) +{ + struct apple_h9p_nvme *h9p; + + h9p = kzalloc_node(sizeof(*h9p), GFP_KERNEL, node); + if (!h9p) + return -ENOMEM; + + spin_lock_init(&h9p->req_lock); + dev->dma_data = h9p; + return 0; +} + +static void nvme_pci_apple_h9p_exit(struct nvme_dev *dev) +{ + kfree(dev->dma_data); + dev->dma_data = NULL; +} + +static int nvme_pci_apple_h9p_find_scratch(struct nvme_dev *dev, + u32 scratch_size_req, + u32 scratch_align_req) +{ + struct apple_h9p_nvme *h9p = nvme_pci_apple_h9p(dev); + struct pci_dev *pdev = to_pci_dev(dev->dev); + struct pci_host_bridge *bridge; + struct device_node *pcie_np; + struct device_node *mem_np; + struct resource res; + resource_size_t size; + u32 iova; + int ret; + + bridge = pci_find_host_bridge(pdev->bus); + if (!bridge) + return -ENODEV; + + pcie_np = bridge->dev.parent ? bridge->dev.parent->of_node : NULL; + if (!pcie_np) + pcie_np = bridge->dev.of_node; + if (!pcie_np) + return -ENODEV; + + mem_np = of_parse_phandle(pcie_np, "memory-region", 0); + if (!mem_np) + return -ENODEV; + + ret = of_address_to_resource(mem_np, 0, &res); + if (ret) + goto out_put_mem; + + ret = of_property_read_u32(pcie_np, "apple,nvmmu-iova", &iova); + if (ret) + goto out_put_mem; + + if (!scratch_size_req || scratch_size_req == U32_MAX) { + ret = -EINVAL; + goto out_put_mem; + } + if (!scratch_align_req || scratch_align_req == U32_MAX) + scratch_align_req = 1; + + size = resource_size(&res); + if (size < scratch_size_req || size > U32_MAX) { + ret = -ENOSPC; + goto out_put_mem; + } + if (!IS_ALIGNED(res.start, scratch_align_req) || + !IS_ALIGNED(iova, scratch_align_req)) { + ret = -EINVAL; + goto out_put_mem; + } + + h9p->scratch_dma = iova; + h9p->scratch_size = size; + dev_dbg(dev->dev, "Apple H9P NVMe scratch %#x@%pa as dma %#llx\n", + h9p->scratch_size, &res.start, (u64)h9p->scratch_dma); + +out_put_mem: + of_node_put(mem_np); + return ret; +} + +static int nvme_pci_apple_h9p_preinit(struct nvme_dev *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + u32 csts, core_mask, log_size; + u32 scratch_size, scratch_align; + int ret; + + if (!nvme_pci_apple_h9p(dev)) + return 0; + + if (pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x40) != + PCIBIOS_SUCCESSFUL || + pci_write_config_dword(pdev, 0x17c, 0x10081008) != + PCIBIOS_SUCCESSFUL || + pci_write_config_dword(pdev, 0x18c, 0) != PCIBIOS_SUCCESSFUL || + pci_write_config_dword(pdev, 0x188, 0x40550000) != + PCIBIOS_SUCCESSFUL) + return -EIO; + + if (readl(dev->bar + APPLE_H9P_REG_BOOT_STATE) == + APPLE_H9P_REG_BOOT_STATE_MAGIC) + dev_dbg(dev->dev, "Apple H9P NVMe boot-state magic present\n"); + + core_mask = readl(dev->bar + APPLE_H9P_REG_CORE_MASK); + log_size = readl(dev->bar + APPLE_H9P_REG_LOG_SIZE); + scratch_size = readl(dev->bar + APPLE_H9P_REG_SCRATCH_SIZE_REQ); + scratch_align = readl(dev->bar + APPLE_H9P_REG_SCRATCH_ALIGN_REQ); + + writel(0, dev->bar + NVME_REG_CC); + ret = readl_poll_timeout(dev->bar + NVME_REG_CSTS, csts, + !(csts & (NVME_CSTS_RDY | NVME_CSTS_CFS)), + 1000, 2000000); + if (ret) + return ret; + + ret = nvme_pci_apple_h9p_find_scratch(dev, scratch_size, + scratch_align); + if (ret) + return ret; + + dev_dbg(dev->dev, + "Apple H9P NVMe core_mask=%#x log_size=%#x scratch_req=%#x align=%#x\n", + core_mask, log_size, scratch_size, scratch_align); + return 0; +} + +static int nvme_pci_apple_h9p_prepare_enable(struct nvme_dev *dev) +{ + struct apple_h9p_nvme *h9p = nvme_pci_apple_h9p(dev); + + if (!h9p) + return 0; + if (!h9p->scratch_size) + return -EINVAL; + + writel(APPLE_H9P_REG_INIT_REGULAR, dev->bar + APPLE_H9P_REG_INIT); + writel(lower_32_bits(h9p->scratch_dma), + dev->bar + APPLE_H9P_REG_SCRATCH_BASE_LO); + writel(upper_32_bits(h9p->scratch_dma), + dev->bar + APPLE_H9P_REG_SCRATCH_BASE_HI); + writel(h9p->scratch_size, dev->bar + APPLE_H9P_REG_SCRATCH_SIZE); + readl(dev->bar + APPLE_H9P_REG_SCRATCH_SIZE); + + return 0; +} + +static blk_status_t nvme_pci_apple_h9p_alloc_req(struct nvme_dev *dev, + struct apple_h9p_nvme_req **req, + unsigned int *tag) +{ + struct apple_h9p_nvme *h9p = nvme_pci_apple_h9p(dev); + unsigned long flags; + unsigned int idx; + + spin_lock_irqsave(&h9p->req_lock, flags); + idx = find_next_zero_bit(h9p->used_req, APPLE_H9P_NVMMU_MAX_REQS, + (h9p->last_req + 1) % + APPLE_H9P_NVMMU_MAX_REQS); + if (idx >= APPLE_H9P_NVMMU_MAX_REQS) + idx = find_first_zero_bit(h9p->used_req, + APPLE_H9P_NVMMU_MAX_REQS); + if (idx >= APPLE_H9P_NVMMU_MAX_REQS) { + spin_unlock_irqrestore(&h9p->req_lock, flags); + dev_dbg_ratelimited(dev->dev, + "Apple H9P NVMe FlatDMA slots exhausted\n"); + return BLK_STS_RESOURCE; + } + + h9p->last_req = idx; + __set_bit(idx, h9p->used_req); + spin_unlock_irqrestore(&h9p->req_lock, flags); + + *req = &h9p->req[idx]; + *tag = idx; + (*req)->npages = 0; + memset((*req)->pages, 0, sizeof((*req)->pages)); + return BLK_STS_OK; +} + +static void nvme_pci_apple_h9p_free_req(struct nvme_dev *dev, + struct apple_h9p_nvme_req *req) +{ + struct apple_h9p_nvme *h9p = nvme_pci_apple_h9p(dev); + unsigned long flags; + unsigned int tag; + + if (!h9p || !req) + return; + + tag = req - h9p->req; + if (tag >= APPLE_H9P_NVMMU_MAX_REQS) + return; + + apple_h9p_pcie_map_nvmmu(dev->dev, tag, NULL, 0, NULL); + req->npages = 0; + + spin_lock_irqsave(&h9p->req_lock, flags); + __clear_bit(tag, h9p->used_req); + spin_unlock_irqrestore(&h9p->req_lock, flags); +} + +static bool nvme_pci_apple_h9p_unmap_data(struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; + unsigned int i; + + if (!iod->dma_private) + return false; + + for (i = 0; i < iod->nr_dma_vecs; i++) + dma_unmap_page(dev->dev, iod->dma_vecs[i].addr, + iod->dma_vecs[i].len, rq_dma_dir(req)); + if (iod->dma_vecs) { + mempool_free(iod->dma_vecs, dev->dmavec_mempool); + iod->dma_vecs = NULL; + } + iod->nr_dma_vecs = 0; + + nvme_pci_apple_h9p_free_req(dev, iod->dma_private); + iod->dma_private = NULL; + iod->cmd.common.flags &= ~NVME_CMD_APPLE_H9P_FLATDMA; + return true; +} + +static blk_status_t nvme_pci_apple_h9p_map_data(struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; + struct apple_h9p_nvme_req *hreq; + struct req_iterator iter; + struct bio_vec bv; + dma_addr_t flatdma; + u64 phys, offs = 0; + unsigned int tag, npages = 0, consumed = 0; + unsigned int total = blk_rq_payload_bytes(req); + blk_status_t status; + int ret; + + if (!nvme_pci_apple_h9p(dev)) + return BLK_STS_NOTSUPP; + if (total > APPLE_H9P_NVMMU_MAX_PAGES * APPLE_H9P_NVMMU_PAGE_SIZE) + return BLK_STS_IOERR; + + status = nvme_pci_apple_h9p_alloc_req(dev, &hreq, &tag); + if (status) + return status; + + iod->dma_private = hreq; + iod->dma_vecs = mempool_alloc(dev->dmavec_mempool, GFP_ATOMIC); + if (!iod->dma_vecs) { + status = BLK_STS_RESOURCE; + goto out_unmap; + } + + rq_for_each_bvec(bv, req, iter) { + dma_addr_t dma_addr; + unsigned int len = bv.bv_len; + + if (WARN_ON_ONCE(iod->nr_dma_vecs >= + blk_rq_nr_phys_segments(req))) { + status = BLK_STS_IOERR; + goto out_unmap; + } + + dma_addr = dma_map_bvec(dev->dev, &bv, rq_dma_dir(req), 0); + if (dma_mapping_error(dev->dev, dma_addr)) { + status = BLK_STS_RESOURCE; + goto out_unmap; + } + + iod->dma_vecs[iod->nr_dma_vecs].addr = dma_addr; + iod->dma_vecs[iod->nr_dma_vecs].len = len; + iod->nr_dma_vecs++; + + phys = page_to_phys(bv.bv_page) + bv.bv_offset; + if (!consumed) { + offs = phys & (APPLE_H9P_NVMMU_PAGE_SIZE - 1); + phys -= offs; + len += offs; + } else if (phys & (APPLE_H9P_NVMMU_PAGE_SIZE - 1)) { + dev_err_ratelimited(dev->dev, + "Apple H9P FlatDMA segment is not page-aligned: phys=%#llx\n", + phys); + status = BLK_STS_IOERR; + goto out_unmap; + } + + if (consumed + bv.bv_len != total && + (len & (APPLE_H9P_NVMMU_PAGE_SIZE - 1))) { + dev_err_ratelimited(dev->dev, + "Apple H9P FlatDMA segment length is not page-aligned: len=%#x\n", + len); + status = BLK_STS_IOERR; + goto out_unmap; + } + + while (len) { + if (npages >= APPLE_H9P_NVMMU_MAX_PAGES) { + status = BLK_STS_IOERR; + goto out_unmap; + } + + hreq->pages[npages++] = phys; + phys += APPLE_H9P_NVMMU_PAGE_SIZE; + len = len > APPLE_H9P_NVMMU_PAGE_SIZE ? + len - APPLE_H9P_NVMMU_PAGE_SIZE : 0; + } + + consumed += bv.bv_len; + } + + ret = apple_h9p_pcie_map_nvmmu(dev->dev, tag, hreq->pages, npages, + &flatdma); + if (ret) { + status = errno_to_blk_status(ret); + if (status == BLK_STS_NOTSUPP) + status = BLK_STS_IOERR; + goto out_unmap; + } + + hreq->npages = npages; + iod->total_len = total; + iod->cmd.common.flags |= NVME_CMD_APPLE_H9P_FLATDMA; + iod->cmd.common.dptr.prp1 = cpu_to_le64(flatdma + offs); + iod->cmd.common.dptr.prp2 = 0; + return BLK_STS_OK; + +out_unmap: + nvme_pci_apple_h9p_unmap_data(req); + return status; +} + +static bool nvme_pci_apple_h9p_reuse_admin_irq(struct nvme_dev *dev, + struct pci_dev *pdev, + struct nvme_queue *adminq) +{ + return (dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR) && + pdev->msi_enabled && + test_bit(NVMEQ_ENABLED, &adminq->flags); +} + +static u32 nvme_pci_apple_h9p_queue_depth(struct nvme_dev *dev, u32 depth) +{ + return min_t(u32, depth, APPLE_H9P_NVMMU_MAX_REQS); +} + +static u32 nvme_pci_apple_h9p_max_hw_sectors(struct nvme_dev *dev, + u32 max_hw_sectors) +{ + return min_t(u32, max_hw_sectors, APPLE_H9P_NVME_MAX_SECTORS); +} + +const struct nvme_pci_dma_ops nvme_pci_apple_h9p_ops = { + .quirks = NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_SHARED_TAGS, + .init = nvme_pci_apple_h9p_init, + .exit = nvme_pci_apple_h9p_exit, + .preinit = nvme_pci_apple_h9p_preinit, + .prepare_enable = nvme_pci_apple_h9p_prepare_enable, + .map_data = nvme_pci_apple_h9p_map_data, + .unmap_data = nvme_pci_apple_h9p_unmap_data, + .reuse_admin_irq = nvme_pci_apple_h9p_reuse_admin_irq, + .queue_depth = nvme_pci_apple_h9p_queue_depth, + .max_hw_sectors = nvme_pci_apple_h9p_max_hw_sectors, +}; diff --git a/drivers/nvme/host/pci-internal.h b/drivers/nvme/host/pci-internal.h new file mode 100644 index 00000000000000..c8175821d94c30 --- /dev/null +++ b/drivers/nvme/host/pci-internal.h @@ -0,0 +1,202 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Private nvme-pci structures shared with controller-specific glue. + */ + +#ifndef _NVME_PCI_INTERNAL_H +#define _NVME_PCI_INTERNAL_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nvme.h" + +#define PCI_DEVICE_ID_APPLE_H9P_NVME 0x2002 + +#define NVME_MAX_NR_DESCRIPTORS 5 + +struct nvme_dev; +struct nvme_queue; + +struct nvme_descriptor_pools { + struct dma_pool *large; + struct dma_pool *small; +}; + +struct nvme_pci_dma_ops { + u32 quirks; + int (*init)(struct nvme_dev *dev, int node); + void (*exit)(struct nvme_dev *dev); + int (*preinit)(struct nvme_dev *dev); + int (*prepare_enable)(struct nvme_dev *dev); + blk_status_t (*map_data)(struct request *req); + bool (*unmap_data)(struct request *req); + bool (*reuse_admin_irq)(struct nvme_dev *dev, struct pci_dev *pdev, + struct nvme_queue *adminq); + u32 (*queue_depth)(struct nvme_dev *dev, u32 depth); + u32 (*max_hw_sectors)(struct nvme_dev *dev, u32 max_hw_sectors); +}; + +/* + * Represents an NVM Express device. Each nvme_dev is a PCI function. + */ +struct nvme_dev { + struct nvme_queue *queues; + struct blk_mq_tag_set tagset; + struct blk_mq_tag_set admin_tagset; + u32 __iomem *dbs; + struct device *dev; + unsigned int online_queues; + unsigned int max_qid; + unsigned int io_queues[HCTX_MAX_TYPES]; + unsigned int num_vecs; + u32 q_depth; + int io_sqes; + u32 db_stride; + void __iomem *bar; + unsigned long bar_mapped_size; + /* protects shutdown sequencing against reset and remove paths */ + struct mutex shutdown_lock; + bool subsystem; + u64 cmb_size; + bool cmb_use_sqes; + u32 cmbsz; + u32 cmbloc; + struct nvme_ctrl ctrl; + u32 last_ps; + bool hmb; + struct sg_table *hmb_sgt; + mempool_t *dmavec_mempool; + const struct nvme_pci_dma_ops *dma_ops; + void *dma_data; + + /* shadow doorbell buffer support: */ + __le32 *dbbuf_dbs; + dma_addr_t dbbuf_dbs_dma_addr; + __le32 *dbbuf_eis; + dma_addr_t dbbuf_eis_dma_addr; + + /* host memory buffer support: */ + u64 host_mem_size; + u32 nr_host_mem_descs; + u32 host_mem_descs_size; + dma_addr_t host_mem_descs_dma; + struct nvme_host_mem_buf_desc *host_mem_descs; + void **host_mem_desc_bufs; + unsigned int nr_allocated_queues; + unsigned int nr_write_queues; + unsigned int nr_poll_queues; + struct nvme_descriptor_pools descriptor_pools[]; +}; + +/* + * An NVM Express queue. Each device has at least two (one for admin + * commands and one for I/O commands). + */ +struct nvme_queue { + struct nvme_dev *dev; + struct nvme_descriptor_pools descriptor_pools; + /* protects SQ tail updates */ + spinlock_t sq_lock; + void *sq_cmds; + /* protects CQ polling state; only used for poll queues */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u32 q_depth; + u16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + __le32 *dbbuf_sq_db; + __le32 *dbbuf_cq_db; + __le32 *dbbuf_sq_ei; + __le32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static inline size_t nvme_pci_sq_size(const struct nvme_queue *q) +{ + return q->q_depth << q->sqes; +} + +static inline size_t nvme_pci_cq_size(const struct nvme_queue *q) +{ + return q->q_depth * sizeof(struct nvme_completion); +} + +/* bits for iod->flags */ +enum nvme_iod_flags { + /* this command has been aborted by the timeout handler */ + IOD_ABORTED = 1U << 0, + + /* uses the small descriptor pool */ + IOD_SMALL_DESCRIPTOR = 1U << 1, + + /* single segment dma mapping */ + IOD_SINGLE_SEGMENT = 1U << 2, + + /* Data payload contains p2p memory */ + IOD_DATA_P2P = 1U << 3, + + /* Metadata contains p2p memory */ + IOD_META_P2P = 1U << 4, + + /* Data payload contains MMIO memory */ + IOD_DATA_MMIO = 1U << 5, + + /* Metadata contains MMIO memory */ + IOD_META_MMIO = 1U << 6, + + /* Metadata using non-coalesced MPTR */ + IOD_SINGLE_META_SEGMENT = 1U << 7, +}; + +struct nvme_dma_vec { + dma_addr_t addr; + unsigned int len; +}; + +/* + * The nvme_iod describes the data in an I/O. + */ +struct nvme_iod { + struct nvme_request req; + struct nvme_command cmd; + u8 flags; + u8 nr_descriptors; + + size_t total_len; + struct dma_iova_state dma_state; + void *descriptors[NVME_MAX_NR_DESCRIPTORS]; + struct nvme_dma_vec *dma_vecs; + unsigned int nr_dma_vecs; + + dma_addr_t meta_dma; + size_t meta_total_len; + struct dma_iova_state meta_dma_state; + struct nvme_sgl_desc *meta_descriptor; + void *dma_private; +}; + +extern const struct nvme_pci_dma_ops nvme_pci_apple_h9p_ops; + +#endif /* _NVME_PCI_INTERNAL_H */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b5f84620067899..7ff959e6ce8e0a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -13,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -30,9 +34,7 @@ #include "trace.h" #include "nvme.h" - -#define SQ_SIZE(q) ((q)->q_depth << (q)->sqes) -#define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion)) +#include "pci-internal.h" /* Optimisation for I/Os between 4k and 128k */ #define NVME_SMALL_POOL_SIZE 256 @@ -41,7 +43,6 @@ * Arbitrary upper bound. */ #define NVME_MAX_BYTES SZ_8M -#define NVME_MAX_NR_DESCRIPTORS 5 /* * For data SGLs we support a single descriptors worth of SGL entries. @@ -276,67 +277,10 @@ static bool noacpi; module_param(noacpi, bool, 0444); MODULE_PARM_DESC(noacpi, "disable acpi bios quirks"); -struct nvme_dev; -struct nvme_queue; - static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); static void nvme_delete_io_queues(struct nvme_dev *dev); static void nvme_update_attrs(struct nvme_dev *dev); -struct nvme_descriptor_pools { - struct dma_pool *large; - struct dma_pool *small; -}; - -/* - * Represents an NVM Express device. Each nvme_dev is a PCI function. - */ -struct nvme_dev { - struct nvme_queue *queues; - struct blk_mq_tag_set tagset; - struct blk_mq_tag_set admin_tagset; - u32 __iomem *dbs; - struct device *dev; - unsigned online_queues; - unsigned max_qid; - unsigned io_queues[HCTX_MAX_TYPES]; - unsigned int num_vecs; - u32 q_depth; - int io_sqes; - u32 db_stride; - void __iomem *bar; - unsigned long bar_mapped_size; - struct mutex shutdown_lock; - bool subsystem; - u64 cmb_size; - bool cmb_use_sqes; - u32 cmbsz; - u32 cmbloc; - struct nvme_ctrl ctrl; - u32 last_ps; - bool hmb; - struct sg_table *hmb_sgt; - mempool_t *dmavec_mempool; - - /* shadow doorbell buffer support: */ - __le32 *dbbuf_dbs; - dma_addr_t dbbuf_dbs_dma_addr; - __le32 *dbbuf_eis; - dma_addr_t dbbuf_eis_dma_addr; - - /* host memory buffer support: */ - u64 host_mem_size; - u32 nr_host_mem_descs; - u32 host_mem_descs_size; - dma_addr_t host_mem_descs_dma; - struct nvme_host_mem_buf_desc *host_mem_descs; - void **host_mem_desc_bufs; - unsigned int nr_allocated_queues; - unsigned int nr_write_queues; - unsigned int nr_poll_queues; - struct nvme_descriptor_pools descriptor_pools[]; -}; - static int io_queue_depth_set(const char *val, const struct kernel_param *kp) { return param_set_uint_minmax(val, kp, NVME_PCI_MIN_QUEUE_SIZE, @@ -358,94 +302,6 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) return container_of(ctrl, struct nvme_dev, ctrl); } -/* - * An NVM Express queue. Each device has at least two (one for admin - * commands and one for I/O commands). - */ -struct nvme_queue { - struct nvme_dev *dev; - struct nvme_descriptor_pools descriptor_pools; - spinlock_t sq_lock; - void *sq_cmds; - /* only used for poll queues: */ - spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; - struct nvme_completion *cqes; - dma_addr_t sq_dma_addr; - dma_addr_t cq_dma_addr; - u32 __iomem *q_db; - u32 q_depth; - u16 cq_vector; - u16 sq_tail; - u16 last_sq_tail; - u16 cq_head; - u16 qid; - u8 cq_phase; - u8 sqes; - unsigned long flags; -#define NVMEQ_ENABLED 0 -#define NVMEQ_SQ_CMB 1 -#define NVMEQ_DELETE_ERROR 2 -#define NVMEQ_POLLED 3 - __le32 *dbbuf_sq_db; - __le32 *dbbuf_cq_db; - __le32 *dbbuf_sq_ei; - __le32 *dbbuf_cq_ei; - struct completion delete_done; -}; - -/* bits for iod->flags */ -enum nvme_iod_flags { - /* this command has been aborted by the timeout handler */ - IOD_ABORTED = 1U << 0, - - /* uses the small descriptor pool */ - IOD_SMALL_DESCRIPTOR = 1U << 1, - - /* single segment dma mapping */ - IOD_SINGLE_SEGMENT = 1U << 2, - - /* Data payload contains p2p memory */ - IOD_DATA_P2P = 1U << 3, - - /* Metadata contains p2p memory */ - IOD_META_P2P = 1U << 4, - - /* Data payload contains MMIO memory */ - IOD_DATA_MMIO = 1U << 5, - - /* Metadata contains MMIO memory */ - IOD_META_MMIO = 1U << 6, - - /* Metadata using non-coalesced MPTR */ - IOD_SINGLE_META_SEGMENT = 1U << 7, -}; - -struct nvme_dma_vec { - dma_addr_t addr; - unsigned int len; -}; - -/* - * The nvme_iod describes the data in an I/O. - */ -struct nvme_iod { - struct nvme_request req; - struct nvme_command cmd; - u8 flags; - u8 nr_descriptors; - - size_t total_len; - struct dma_iova_state dma_state; - void *descriptors[NVME_MAX_NR_DESCRIPTORS]; - struct nvme_dma_vec *dma_vecs; - unsigned int nr_dma_vecs; - - dma_addr_t meta_dma; - size_t meta_total_len; - struct dma_iova_state meta_dma_state; - struct nvme_sgl_desc *meta_descriptor; -}; - static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) { return dev->nr_allocated_queues * 8 * dev->db_stride; @@ -932,6 +788,10 @@ static void nvme_unmap_data(struct request *req) struct device *dma_dev = nvmeq->dev->dev; unsigned int attrs = 0; + if (nvmeq->dev->dma_ops && nvmeq->dev->dma_ops->unmap_data && + nvmeq->dev->dma_ops->unmap_data(req)) + return; + if (iod->flags & IOD_SINGLE_SEGMENT) { static_assert(offsetof(union nvme_data_ptr, prp1) == offsetof(union nvme_data_ptr, sgl.addr)); @@ -1246,6 +1106,12 @@ static blk_status_t nvme_map_data(struct request *req) struct blk_dma_iter iter; blk_status_t ret; + if (dev->dma_ops && dev->dma_ops->map_data) { + ret = dev->dma_ops->map_data(req); + if (ret != BLK_STS_NOTSUPP) + return ret; + } + /* * Try to skip the DMA iterator for single segment requests, as that * significantly improves performances for small I/O sizes. @@ -1401,6 +1267,8 @@ static blk_status_t nvme_prep_rq(struct request *req) iod->total_len = 0; iod->meta_total_len = 0; iod->nr_dma_vecs = 0; + iod->dma_vecs = NULL; + iod->dma_private = NULL; ret = nvme_setup_cmd(req->q->queuedata, req); if (ret) @@ -1450,6 +1318,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, ret = nvme_prep_rq(req); if (unlikely(ret)) return ret; + spin_lock(&nvmeq->sq_lock); nvme_sq_copy_cmd(nvmeq, &iod->cmd); nvme_write_sq_db(nvmeq, bd->last); @@ -2009,17 +1878,17 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) static void nvme_free_queue(struct nvme_queue *nvmeq) { - dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq), - (void *)nvmeq->cqes, nvmeq->cq_dma_addr); + dma_free_coherent(nvmeq->dev->dev, nvme_pci_cq_size(nvmeq), + (void *)nvmeq->cqes, nvmeq->cq_dma_addr); if (!nvmeq->sq_cmds) return; if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) { pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev), - nvmeq->sq_cmds, SQ_SIZE(nvmeq)); + nvmeq->sq_cmds, nvme_pci_sq_size(nvmeq)); } else { - dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq), - nvmeq->sq_cmds, nvmeq->sq_dma_addr); + dma_free_coherent(nvmeq->dev->dev, nvme_pci_sq_size(nvmeq), + nvmeq->sq_cmds, nvmeq->sq_dma_addr); } } @@ -2106,7 +1975,8 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, struct pci_dev *pdev = to_pci_dev(dev->dev); if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { - nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(nvmeq)); + nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, + nvme_pci_sq_size(nvmeq)); if (nvmeq->sq_cmds) { nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev, nvmeq->sq_cmds); @@ -2115,12 +1985,13 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, return 0; } - pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(nvmeq)); + pci_free_p2pmem(pdev, nvmeq->sq_cmds, + nvme_pci_sq_size(nvmeq)); } } - nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(nvmeq), - &nvmeq->sq_dma_addr, GFP_KERNEL); + nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, nvme_pci_sq_size(nvmeq), + &nvmeq->sq_dma_addr, GFP_KERNEL); if (!nvmeq->sq_cmds) return -ENOMEM; return 0; @@ -2135,7 +2006,7 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) nvmeq->sqes = qid ? dev->io_sqes : NVME_ADM_SQES; nvmeq->q_depth = depth; - nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(nvmeq), + nvmeq->cqes = dma_alloc_coherent(dev->dev, nvme_pci_cq_size(nvmeq), &nvmeq->cq_dma_addr, GFP_KERNEL); if (!nvmeq->cqes) goto free_nvmeq; @@ -2155,8 +2026,8 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) return 0; free_cqdma: - dma_free_coherent(dev->dev, CQ_SIZE(nvmeq), (void *)nvmeq->cqes, - nvmeq->cq_dma_addr); + dma_free_coherent(dev->dev, nvme_pci_cq_size(nvmeq), + (void *)nvmeq->cqes, nvmeq->cq_dma_addr); free_nvmeq: return -ENOMEM; } @@ -2184,7 +2055,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; - memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq)); + memset((void *)nvmeq->cqes, 0, nvme_pci_cq_size(nvmeq)); nvme_dbbuf_init(dev, nvmeq, qid); dev->online_queues++; wmb(); /* ensure the first interrupt sees the initialization */ @@ -2386,6 +2257,12 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ); lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ); + if (dev->dma_ops && dev->dma_ops->prepare_enable) { + result = dev->dma_ops->prepare_enable(dev); + if (result) + return result; + } + result = nvme_enable_ctrl(&dev->ctrl); if (result) return result; @@ -2942,6 +2819,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) unsigned int nr_io_queues; unsigned long size; int result; + bool reuse_single_vector = false; /* * Sample the module parameters once at reset time so that we have @@ -2986,7 +2864,11 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) result = nvme_setup_io_queues_trylock(dev); if (result) return result; - if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) + reuse_single_vector = dev->dma_ops && dev->dma_ops->reuse_admin_irq && + dev->dma_ops->reuse_admin_irq(dev, pdev, adminq); + + if (!reuse_single_vector && + test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) pci_free_irq(pdev, 0, adminq); if (dev->cmb_use_sqes) { @@ -3014,19 +2896,27 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) retry: /* Deregister the admin queue's interrupt */ - if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) + if (!reuse_single_vector && + test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) pci_free_irq(pdev, 0, adminq); /* * If we enable msix early due to not intx, disable it again before * setting up the full range we need. */ - pci_free_irq_vectors(pdev); + if (reuse_single_vector) { + result = 1; + dev->io_queues[HCTX_TYPE_DEFAULT] = 1; + dev->io_queues[HCTX_TYPE_READ] = 0; + dev->io_queues[HCTX_TYPE_POLL] = 0; + } else { + pci_free_irq_vectors(pdev); - result = nvme_setup_irqs(dev, nr_io_queues); - if (result <= 0) { - result = -EIO; - goto out_unlock; + result = nvme_setup_irqs(dev, nr_io_queues); + if (result <= 0) { + result = -EIO; + goto out_unlock; + } } dev->num_vecs = result; @@ -3039,10 +2929,14 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * path to scale better, even if the receive path is limited by the * number of interrupts. */ - result = queue_request_irq(adminq); - if (result) - goto out_unlock; - set_bit(NVMEQ_ENABLED, &adminq->flags); + if (reuse_single_vector) { + result = 0; + } else { + result = queue_request_irq(adminq); + if (result) + goto out_unlock; + set_bit(NVMEQ_ENABLED, &adminq->flags); + } mutex_unlock(&dev->shutdown_lock); result = nvme_create_io_queues(dev); @@ -3249,8 +3143,16 @@ static int nvme_pci_enable(struct nvme_dev *dev) dev_warn(dev->ctrl.device, "IO queue depth clamped to %d\n", dev->q_depth); } + if (dev->dma_ops && dev->dma_ops->queue_depth) + dev->q_depth = dev->dma_ops->queue_depth(dev, dev->q_depth); dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */ + if (dev->dma_ops && dev->dma_ops->preinit) { + result = dev->dma_ops->preinit(dev); + if (result) + goto free_irq; + } + nvme_map_cmb(dev); pci_save_state(pdev); @@ -3373,6 +3275,8 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) nvme_free_tagset(dev); put_device(dev->dev); kfree(dev->queues); + if (dev->dma_ops && dev->dma_ops->exit) + dev->dma_ops->exit(dev); kfree(dev); } @@ -3707,6 +3611,15 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, quirks |= qentry->enabled_quirks; quirks &= ~qentry->disabled_quirks; } + if (pdev->vendor == PCI_VENDOR_ID_APPLE && + pdev->device == PCI_DEVICE_ID_APPLE_H9P_NVME) + dev->dma_ops = &nvme_pci_apple_h9p_ops; + if (dev->dma_ops) { + quirks |= dev->dma_ops->quirks; + ret = dev->dma_ops->init(dev, node); + if (ret) + goto out_put_device; + } ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, quirks); if (ret) @@ -3726,6 +3639,10 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, dev->ctrl.max_hw_sectors = min_t(u32, NVME_MAX_BYTES >> SECTOR_SHIFT, dma_opt_mapping_size(&pdev->dev) >> 9); + if (dev->dma_ops && dev->dma_ops->max_hw_sectors) + dev->ctrl.max_hw_sectors = + dev->dma_ops->max_hw_sectors(dev, + dev->ctrl.max_hw_sectors); dev->ctrl.max_segments = NVME_MAX_SEGS; dev->ctrl.max_integrity_segments = 1; return dev; @@ -3733,6 +3650,8 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, out_put_device: put_device(dev->dev); kfree(dev->queues); + if (dev->dma_ops && dev->dma_ops->exit) + dev->dma_ops->exit(dev); out_free_dev: kfree(dev); return ERR_PTR(ret); @@ -4271,6 +4190,9 @@ static const struct pci_device_id nvme_id_table[] = { */ .driver_data = NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_QDEPTH_ONE }, + { PCI_DEVICE(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_H9P_NVME), + .driver_data = NVME_QUIRK_SINGLE_VECTOR | + NVME_QUIRK_SHARED_TAGS }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005), .driver_data = NVME_QUIRK_SINGLE_VECTOR | diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 2247709ef6d696..0b9e77af5aac39 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -54,6 +54,19 @@ config PCIE_APPLE If unsure, say Y if you have an Apple Silicon system. +config PCIE_APPLE_H9P + tristate "Apple H9P/T8010 PCIe controller" + depends on ARCH_APPLE || COMPILE_TEST + depends on OF + depends on PCI_MSI + select PCI_HOST_COMMON + help + Say Y here to enable the PCIe root complex found in Apple A10 + (T8010/H9P) devices. This controller is used for the internal + Apple NVMe storage path on devices such as iPad7,12. + It provides the legacy PHY/link setup and NVMMU/SART mapping + hooks needed before the standard PCI and NVMe layers can bind. + config PCI_VERSATILE bool "ARM Versatile PB PCI controller" depends on ARCH_VERSATILE || COMPILE_TEST diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile index ac8db283f0fea7..a4b489bd4f473f 100644 --- a/drivers/pci/controller/Makefile +++ b/drivers/pci/controller/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb.o obj-$(CONFIG_PCI_LOONGSON) += pci-loongson.o obj-$(CONFIG_PCIE_HISI_ERR) += pcie-hisi-error.o obj-$(CONFIG_PCIE_APPLE) += pcie-apple.o +obj-$(CONFIG_PCIE_APPLE_H9P) += pcie-apple-h9p.o obj-$(CONFIG_PCIE_MT7621) += pcie-mt7621.o obj-$(CONFIG_PCIE_ASPEED) += pcie-aspeed.o diff --git a/drivers/pci/controller/pcie-apple-h9p.c b/drivers/pci/controller/pcie-apple-h9p.c new file mode 100644 index 00000000000000..2b0c2bb9a3b22a --- /dev/null +++ b/drivers/pci/controller/pcie-apple-h9p.c @@ -0,0 +1,1347 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCIe host bridge driver for Apple H9P/T8010 SoCs. + * + * The controller exposes an ECAM-compatible root complex after the SoC-specific + * power, clock and PHY sequence has brought a port out of reset. The hardware + * differs enough from the Apple Silicon PCIe controller to keep the early H9P + * bring-up sequence separate, while still using the generic PCI host bridge + * and MSI subsystems. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "pci-host-common.h" + +#define H9P_NUM_PORTS 4 +#define H9P_NUM_MSI 32 +#define H9P_MSI_PER_PORT (H9P_NUM_MSI / H9P_NUM_PORTS) + +#define H9P_CFG_PORT_STRIDE 0x8000 +#define H9P_CFG_PORT_MISC 0x08e0 + +#define H9P_PHY0_COMMON_CTL0 0x0004 +#define H9P_PHY0_COMMON_CTL1 0x0014 +#define H9P_PHY0_COMMON_CTL2 0x0024 +#define H9P_PHY0_COMMON_CTL3 0x0034 +#define H9P_PHY0_COMMON_CTL_ENABLE BIT(0) +#define H9P_PHY0_COMMON_CTL_INIT BIT(4) +#define H9P_PHY0_PORT_STRIDE 0x0080 +#define H9P_PHY0_PORTSTAT(port) (0x0100 + (port) * H9P_PHY0_PORT_STRIDE) +#define H9P_PHY0_PORT_CTL0(port) (0x0100 + (port) * H9P_PHY0_PORT_STRIDE) +#define H9P_PHY0_PORT_CTL1(port) (0x0124 + (port) * H9P_PHY0_PORT_STRIDE) +#define H9P_PHY0_PORT_CTL2(port) (0x0134 + (port) * H9P_PHY0_PORT_STRIDE) +#define H9P_PHY0_COMMON_STAT 0x0028 +#define H9P_PHY0_COMMON_STAT_INIT_DONE BIT(4) +#define H9P_PHY0_COMMON_STAT_READY BIT(0) +#define H9P_PHY0_PORT_LINK_RATE(port) (0x4020 + (port) * 0x0040) +#define H9P_PHY1_PORTMASK 0x000c + +#define H9P_PHY2_EQ_COMMON0 0x0180 +#define H9P_PHY2_EQ_COMMON1 0x0184 +#define H9P_PHY2_EQ_TIME0 0x0090 +#define H9P_PHY2_EQ_TIME1 0x0098 +#define H9P_PHY2_PORT_STRIDE 0x0800 +#define H9P_PHY2_PORT(port, reg) ((reg) + (port) * H9P_PHY2_PORT_STRIDE) +#define H9P_PHY2_PORT_EQ_CTL 0x10088 +#define H9P_PHY2_PORT_IDLE 0x10784 +#define H9P_PHY2_PORT_EQ_PRESET 0x10004 +#define H9P_PHY2_PORT_RX_CTL0 0x20788 +#define H9P_PHY2_PORT_RX_CTL1 0x207a0 +#define H9P_PHY2_PORT_RX_CTL2 0x207a8 +#define H9P_PHY2_PORT_RX_CTL3 0x20400 +#define H9P_PHY2_PORT_TIMER0 0x2009c +#define H9P_PHY2_PORT_TIMER1 0x200dc +#define H9P_PHY2_PORT_TIMER2 0x200a0 +#define H9P_PHY2_PORT_TIMER3 0x200e0 +#define H9P_PHY2_PORT_TIMER4 0x200a4 +#define H9P_PHY2_PORT_TIMER5 0x200e4 +#define H9P_PHY2_PORT_CLEAR0 0x20330 +#define H9P_PHY2_PORT_CLEAR1 0x20340 +#define H9P_PHY2_PORT_CLEAR2 0x20350 + +#define H9P_PORT_LTSSMCTL 0x0080 +#define H9P_PORT_LTSSM_ENABLE BIT(0) +#define H9P_PORT_IRQSTAT 0x0100 +#define H9P_PORT_IRQMASK 0x0104 +#define H9P_PORT_IRQMASK_PRE_LINK 0xff002fff +#define H9P_PORT_IRQSTAT_PRE_LINK 0x00ffd000 +#define H9P_PORT_IRQMASK_LINK_UP 0xff002f0f +#define H9P_PORT_PWRCTL 0x0124 +#define H9P_PORT_PWRCTL_INIT 0x31 +#define H9P_PORT_MSIVECBASE 0x0128 +#define H9P_PORT_ENABLE 0x0140 +#define H9P_PORT_ENABLE_APPLE BIT(31) +#define H9P_PORT_LINKSTS 0x0208 +#define H9P_PORT_LINKSTS_LTSSM GENMASK(13, 8) +#define H9P_PORT_LTSSM_DETECT 0x11 +#define H9P_PORT_LTSSM_L0 0x14 + +#define H9P_LINK_SPEED_2_5GT 1 +#define H9P_LINK_SPEED_8GT 3 + +#define H9P_PCIECLK_POSTUP0 0x0000 +#define H9P_PCIECLK_POSTUP1 0x000c +#define H9P_PCIECLK_POSTUP2 0x4104 +#define H9P_PCIECLK_POSTUP3 0x4100 +#define H9P_PCIECLK_POSTUP0_VALUE 0x00000007 +#define H9P_PCIECLK_POSTUP1_VALUE 0x80010005 +#define H9P_PCIECLK_POSTUP2_VALUE 0x00000003 +#define H9P_PCIECLK_POSTUP3_VALUE 0x00000003 + +#define H9P_NVMMU_TCB_CTRL 0x0004 +#define H9P_NVMMU_TCB_BASE_LO 0x0008 +#define H9P_NVMMU_TCB_BASE_HI 0x000c +#define H9P_NVMMU_TCB_TABLE_LO 0x0010 +#define H9P_NVMMU_TCB_TABLE_HI 0x0014 +#define H9P_NVMMU_SART_CTRL 0x0020 +#define H9P_NVMMU_SART_VA_BASE 0x0024 +#define H9P_NVMMU_SART_VA_END 0x0028 +#define H9P_NVMMU_SART_PA_BASE 0x002c + +#define H9P_NVMMU_TCB_BYTES 0x80 +#define H9P_NVMMU_TCB_DWORDS (H9P_NVMMU_TCB_BYTES / sizeof(u32)) +#define H9P_NVMMU_SGL_WORDS APPLE_H9P_NVMMU_MAX_PAGES +#define H9P_NVMMU_FLATDMA_BASE 0x40000000ULL +#define H9P_NVMMU_FLATDMA_STRIDE SZ_8M +#define H9P_NVMMU_SART_ALIGNMENT SZ_1M +#define H9P_NVMMU_TCB_READ 0x100 +#define H9P_NVMMU_TCB_WRITE 0x200 + +#define H9P_DEFAULT_MSI_DOORBELL 0xbffff000ULL + +struct apple_h9p_tunable { + u32 offset; + u32 size; + u64 mask; + u64 data; +}; + +static const struct apple_h9p_tunable h9p_phy0_tunables[] = { + { 0x0008, 4, 0x7f7f7f7f, 0x00000000 }, + { 0x000c, 4, 0x00073f3f, 0x00043f00 }, + { 0x0010, 4, 0x00000700, 0x00000000 }, + { 0x0018, 4, 0x00ffffff, 0x000c0960 }, + { 0x001c, 4, 0x00001fff, 0x0000092c }, + { 0x002c, 4, 0x000000ff, 0x00000009 }, + { 0x003c, 4, 0x80000000, 0x00000000 }, + { 0x0100, 4, 0x31100010, 0x01000000 }, + { 0x0108, 4, 0x00000707, 0x00000000 }, + { 0x010c, 4, 0x00073f3f, 0x00043f00 }, + { 0x0110, 4, 0x00000011, 0x00000001 }, + { 0x0114, 4, 0x00000007, 0x00000000 }, + { 0x0118, 4, 0x00073f3f, 0x00043f00 }, + { 0x0120, 4, 0x0333003f, 0x0111000f }, + { 0x0130, 4, 0x000000ff, 0x0000000f }, + { 0x0138, 4, 0x0000007f, 0x0000003e }, + { 0x0180, 4, 0x31100010, 0x01000000 }, + { 0x0188, 4, 0x00000707, 0x00000000 }, + { 0x018c, 4, 0x00073f3f, 0x00043f00 }, + { 0x01a0, 4, 0x0333003f, 0x0111000f }, + { 0x01b0, 4, 0x000000ff, 0x0000000f }, + { 0x01b8, 4, 0x0000007f, 0x0000003e }, + { 0x0200, 4, 0x31100010, 0x01000000 }, + { 0x0208, 4, 0x00000707, 0x00000000 }, + { 0x020c, 4, 0x00073f3f, 0x00043f00 }, + { 0x0220, 4, 0x0333003f, 0x0111000f }, + { 0x0230, 4, 0x000000ff, 0x0000000f }, + { 0x0238, 4, 0x0000007f, 0x0000003e }, + { 0x0280, 4, 0x31100010, 0x01000000 }, + { 0x0288, 4, 0x00000707, 0x00000000 }, + { 0x028c, 4, 0x00073f3f, 0x00043f00 }, + { 0x02a0, 4, 0x0333003f, 0x0111000f }, + { 0x02b0, 4, 0x000000ff, 0x0000000f }, + { 0x02b8, 4, 0x0000007f, 0x0000003e }, + { 0x0100, 4, 0x00000010, 0x00000010 }, + { 0x0180, 4, 0x00000010, 0x00000000 }, + { 0x0200, 4, 0x00000010, 0x00000000 }, + { 0x0280, 4, 0x00000010, 0x00000000 }, +}; + +static const struct apple_h9p_tunable h9p_config_tunables[] = { + { 0x0098, 4, 0x0000000f, 0x00000000 }, + { 0x0164, 4, 0x00f8ff00, 0x00000000 }, + { 0x08e0, 4, 0x00000005, 0x00000005 }, +}; + +static const struct apple_h9p_tunable h9p_port_tunables[] = { + { 0x0090, 4, 0x000000ff, 0x00000028 }, + { 0x0130, 4, 0x0000000d, 0x00000005 }, + { 0x0134, 4, 0x00000001, 0x00000001 }, + { 0x0138, 4, 0x00007f7f, 0x00000000 }, + { 0x013c, 4, 0x00000002, 0x00000002 }, + { 0x0140, 4, 0x0073ffff, 0x00704c4b }, +}; + +struct apple_h9p_pcie { + struct device *dev; + struct platform_device *pdev; + struct pci_host_bridge *bridge; + struct pci_config_window *cfgwin; + + void __iomem *base_config; + void __iomem *base_phy[3]; + void __iomem *base_port[H9P_NUM_PORTS]; + void __iomem *base_pcieclk_postup; + + struct clk_bulk_data clks[3]; + struct gpio_desc *perst[H9P_NUM_PORTS]; + struct gpio_desc *clkreq[H9P_NUM_PORTS]; + struct gpio_descs *devpwr; + struct pinctrl *pinctrl; + u32 enabled_ports; + + struct apple_h9p_nvmmu { + struct apple_h9p_pcie *pcie; + void __iomem *base; + u64 pa_base; + u32 va_base; + u32 size; + void *tcb; + void *tcb_table; + void *tcb_sgl; + size_t tcb_size; + size_t tcb_table_size; + size_t tcb_sgl_size; + dma_addr_t tcb_dma; + dma_addr_t tcb_table_dma; + dma_addr_t tcb_sgl_dma; + } nvmmu[H9P_NUM_PORTS]; + + struct device **pd_dev; + struct device_link **pd_link; + int pd_count; + + DECLARE_BITMAP(used_msi[H9P_NUM_PORTS], H9P_MSI_PER_PORT); + u64 msi_doorbell; + /* Protects the per-port MSI allocation bitmaps. */ + spinlock_t used_msi_lock; + struct irq_domain *irq_dom; + struct irq_domain *msi_dom; + + struct apple_h9p_msi { + struct apple_h9p_pcie *pcie; + int virq; + bool disabled; + } msi[H9P_NUM_MSI]; +}; + +static inline void h9p_rmw(void __iomem *addr, u32 clear, u32 set) +{ + writel((readl(addr) & ~clear) | set, addr); +} + +static inline void h9p_rmww(void __iomem *addr, u16 clear, u16 set) +{ + writew((readw(addr) & ~clear) | set, addr); +} + +static inline u64 h9p_readsz(void __iomem *addr, u32 size) +{ + switch (size) { + case 1: + return readb(addr); + case 2: + return readw(addr); + case 4: + return readl(addr); + case 8: + return readq(addr); + default: + return 0; + } +} + +static inline void h9p_writesz(u64 value, void __iomem *addr, u32 size) +{ + switch (size) { + case 1: + writeb(value, addr); + break; + case 2: + writew(value, addr); + break; + case 4: + writel(value, addr); + break; + case 8: + writeq(value, addr); + break; + } +} + +static inline void h9p_writel_flush(u32 value, void __iomem *addr) +{ + writel(value, addr); + readl(addr); +} + +static void apple_h9p_pcie_detach_genpd(struct apple_h9p_pcie *pcie) +{ + int i; + + for (i = pcie->pd_count - 1; i >= 0; i--) { + if (pcie->pd_link[i]) + device_link_del(pcie->pd_link[i]); + if (!IS_ERR_OR_NULL(pcie->pd_dev[i])) + dev_pm_domain_detach(pcie->pd_dev[i], true); + } +} + +static int apple_h9p_pcie_attach_genpd(struct apple_h9p_pcie *pcie) +{ + struct device *dev = pcie->dev; + int i; + + pcie->pd_count = of_count_phandle_with_args(dev->of_node, + "power-domains", + "#power-domain-cells"); + if (pcie->pd_count <= 1) + return 0; + + pcie->pd_dev = devm_kcalloc(dev, pcie->pd_count, + sizeof(*pcie->pd_dev), GFP_KERNEL); + if (!pcie->pd_dev) + return -ENOMEM; + + pcie->pd_link = devm_kcalloc(dev, pcie->pd_count, + sizeof(*pcie->pd_link), GFP_KERNEL); + if (!pcie->pd_link) + return -ENOMEM; + + for (i = 0; i < pcie->pd_count; i++) { + pcie->pd_dev[i] = dev_pm_domain_attach_by_id(dev, i); + if (IS_ERR(pcie->pd_dev[i])) { + apple_h9p_pcie_detach_genpd(pcie); + return PTR_ERR(pcie->pd_dev[i]); + } + + pcie->pd_link[i] = device_link_add(dev, pcie->pd_dev[i], + DL_FLAG_STATELESS | + DL_FLAG_PM_RUNTIME | + DL_FLAG_RPM_ACTIVE); + if (!pcie->pd_link[i]) { + apple_h9p_pcie_detach_genpd(pcie); + return -EINVAL; + } + } + + return 0; +} + +static void apple_h9p_pcie_genpd_cleanup(void *data) +{ + apple_h9p_pcie_detach_genpd(data); +} + +static void apple_h9p_pcie_clk_cleanup(void *data) +{ + struct apple_h9p_pcie *pcie = data; + + clk_bulk_disable_unprepare(ARRAY_SIZE(pcie->clks), pcie->clks); +} + +static struct apple_h9p_pcie *apple_h9p_pcie_lookup(struct device *dev) +{ + struct pci_host_bridge *bridge = dev_get_drvdata(dev); + + return bridge ? pci_host_bridge_priv(bridge) : NULL; +} + +static int apple_h9p_pcie_config_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + struct pci_config_window *cfg = bus->sysdata; + + if (bus->number == cfg->busr.start && PCI_SLOT(devfn) >= H9P_NUM_PORTS) + return PCIBIOS_DEVICE_NOT_FOUND; + + return pci_generic_config_read(bus, devfn, where, size, val); +} + +static int apple_h9p_pcie_config_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + struct pci_config_window *cfg = bus->sysdata; + + if (bus->number == cfg->busr.start && PCI_SLOT(devfn) >= H9P_NUM_PORTS) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (where <= PCI_INTERRUPT_LINE && where + size > PCI_INTERRUPT_LINE) + val |= 0xffu << ((PCI_INTERRUPT_LINE - where) << 3); + + return pci_generic_config_write(bus, devfn, where, size, val); +} + +static unsigned int apple_h9p_pcie_bus_to_port(struct apple_h9p_pcie *pcie, + unsigned int bus) +{ + unsigned int port; + + for (port = 0; port < H9P_NUM_PORTS; port++) { + u32 cfg, sec, sub; + + cfg = readl(pcie->base_config + port * H9P_CFG_PORT_STRIDE + + PCI_PRIMARY_BUS); + sec = (cfg >> 8) & 0xff; + sub = (cfg >> 16) & 0xff; + + if (!sec || !sub || sec == 0xff || sub == 0xff) + continue; + if (bus >= sec && bus <= sub) + return port; + } + + return H9P_NUM_PORTS; +} + +static int apple_h9p_pcie_device_port(struct apple_h9p_pcie *pcie, + struct device *dev) +{ + struct pci_dev *pdev; + + if (!dev_is_pci(dev)) + return -ENODEV; + + pdev = to_pci_dev(dev); + if (!pdev->bus) + return -ENODEV; + + return apple_h9p_pcie_bus_to_port(pcie, pdev->bus->number); +} + +static void apple_h9p_msi_compose_msg(struct irq_data *d, struct msi_msg *msg) +{ + struct apple_h9p_pcie *pcie = irq_data_get_irq_chip_data(d); + + if (!pcie) { + memset(msg, 0, sizeof(*msg)); + return; + } + + msg->address_lo = lower_32_bits(pcie->msi_doorbell); + msg->address_hi = upper_32_bits(pcie->msi_doorbell); + msg->data = d->hwirq; +} + +static void apple_h9p_msi_write_msg(struct irq_data *d, struct msi_msg *msg) +{ + pci_write_msi_msg(d->irq, msg); +} + +static int apple_h9p_msi_set_affinity(struct irq_data *d, + const struct cpumask *mask, bool force) +{ + return -EINVAL; +} + +static void apple_h9p_msi_mask(struct irq_data *d) +{ + struct apple_h9p_pcie *pcie = irq_data_get_irq_chip_data(d); + + if (!pcie || d->hwirq >= H9P_NUM_MSI || pcie->msi[d->hwirq].virq <= 0) + return; + + if (!pcie->msi[d->hwirq].disabled) { + disable_irq_nosync(pcie->msi[d->hwirq].virq); + pcie->msi[d->hwirq].disabled = true; + } +} + +static void apple_h9p_msi_unmask(struct irq_data *d) +{ + struct apple_h9p_pcie *pcie = irq_data_get_irq_chip_data(d); + + if (!pcie || d->hwirq >= H9P_NUM_MSI || pcie->msi[d->hwirq].virq <= 0) + return; + + if (pcie->msi[d->hwirq].disabled) { + enable_irq(pcie->msi[d->hwirq].virq); + pcie->msi[d->hwirq].disabled = false; + } +} + +static void apple_h9p_msi_ack(struct irq_data *d) +{ +} + +static struct irq_chip apple_h9p_msi_chip = { + .name = "Apple H9P PCIe MSI", + .irq_ack = apple_h9p_msi_ack, + .irq_mask = apple_h9p_msi_mask, + .irq_unmask = apple_h9p_msi_unmask, + .irq_compose_msi_msg = apple_h9p_msi_compose_msg, + .irq_write_msi_msg = apple_h9p_msi_write_msg, + .irq_set_affinity = apple_h9p_msi_set_affinity, +}; + +static void apple_h9p_msi_isr(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct apple_h9p_msi *msi = irq_desc_get_handler_data(desc); + struct apple_h9p_pcie *pcie = msi->pcie; + unsigned int idx = msi - pcie->msi; + unsigned int virq; + + chained_irq_enter(chip, desc); + virq = irq_find_mapping(pcie->irq_dom, idx); + if (virq) + generic_handle_irq(virq); + chained_irq_exit(chip, desc); +} + +static int apple_h9p_msi_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *args) +{ + struct apple_h9p_pcie *pcie = domain->host_data; + msi_alloc_info_t *info = args; + struct msi_desc *desc = info ? info->desc : NULL; + struct pci_dev *pdev = NULL; + unsigned long flags; + unsigned int bus = 0; + unsigned int port; + int slot; + + if (nr_irqs != 1) + return -ENOSPC; + + if (desc && desc->dev && dev_is_pci(desc->dev)) { + pdev = to_pci_dev(desc->dev); + if (pdev->bus) + bus = pdev->bus->number; + } + + if (bus < 1) + return -ENOSPC; + + port = apple_h9p_pcie_bus_to_port(pcie, bus); + if (port >= H9P_NUM_PORTS) + return -ENOSPC; + if (!(pcie->enabled_ports & BIT(port))) + return -ENOSPC; + + spin_lock_irqsave(&pcie->used_msi_lock, flags); + slot = find_first_zero_bit(pcie->used_msi[port], H9P_MSI_PER_PORT); + if (slot >= H9P_MSI_PER_PORT) { + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); + return -ENOSPC; + } + __set_bit(slot, pcie->used_msi[port]); + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); + + irq_domain_set_info(domain, virq, port * H9P_MSI_PER_PORT + slot, + &apple_h9p_msi_chip, pcie, handle_edge_irq, + NULL, NULL); + return 0; +} + +static void apple_h9p_msi_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d = irq_domain_get_irq_data(domain, virq); + struct apple_h9p_pcie *pcie = d ? irq_data_get_irq_chip_data(d) : NULL; + unsigned long flags; + unsigned int i; + + if (!pcie || !d) + return; + + spin_lock_irqsave(&pcie->used_msi_lock, flags); + for (i = 0; i < nr_irqs; i++) { + unsigned long hwirq = d->hwirq + i; + unsigned int port = hwirq / H9P_MSI_PER_PORT; + unsigned int slot = hwirq % H9P_MSI_PER_PORT; + + if (port < H9P_NUM_PORTS) + __clear_bit(slot, pcie->used_msi[port]); + } + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); +} + +static const struct irq_domain_ops apple_h9p_msi_domain_ops = { + .alloc = apple_h9p_msi_alloc, + .free = apple_h9p_msi_free, +}; + +static struct irq_chip apple_h9p_msi_parent_chip = { + .name = "Apple H9P PCIe MSI parent", + .irq_ack = irq_chip_ack_parent, + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_write_msi_msg = apple_h9p_msi_write_msg, +}; + +static struct msi_domain_info apple_h9p_msi_domain_info = { + .flags = MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX | + MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_PCI_MSI_MASK_PARENT, + .chip = &apple_h9p_msi_parent_chip, +}; + +static void apple_h9p_pcie_msi_cleanup(void *data) +{ + struct apple_h9p_pcie *pcie = data; + unsigned int i; + + for (i = 0; i < H9P_NUM_MSI; i++) { + if (pcie->msi[i].virq <= 0) + continue; + + irq_set_chained_handler_and_data(pcie->msi[i].virq, NULL, + NULL); + if (pcie->msi[i].disabled) { + enable_irq(pcie->msi[i].virq); + pcie->msi[i].disabled = false; + } + } + + if (pcie->msi_dom) { + irq_domain_remove(pcie->msi_dom); + pcie->msi_dom = NULL; + } + + if (pcie->irq_dom) { + irq_domain_remove(pcie->irq_dom); + pcie->irq_dom = NULL; + } +} + +static int apple_h9p_pcie_setup_msi(struct apple_h9p_pcie *pcie) +{ + struct device *dev = pcie->dev; + struct fwnode_handle *fwnode = dev_fwnode(dev); + unsigned int i; + int ret; + + pcie->irq_dom = irq_domain_create_linear(fwnode, H9P_NUM_MSI, + &apple_h9p_msi_domain_ops, + pcie); + if (!pcie->irq_dom) + return -ENOMEM; + + pcie->msi_dom = msi_create_irq_domain(fwnode, + &apple_h9p_msi_domain_info, + pcie->irq_dom); + if (!pcie->msi_dom) { + irq_domain_remove(pcie->irq_dom); + pcie->irq_dom = NULL; + return -ENOMEM; + } + + ret = devm_add_action_or_reset(dev, apple_h9p_pcie_msi_cleanup, + pcie); + if (ret) + return ret; + + for (i = 0; i < H9P_NUM_MSI; i++) { + int irq = platform_get_irq(pcie->pdev, H9P_NUM_PORTS + i); + + if (irq < 0) + return irq; + + pcie->msi[i].pcie = pcie; + pcie->msi[i].virq = irq; + irq_set_chained_handler_and_data(irq, apple_h9p_msi_isr, + &pcie->msi[i]); + disable_irq(irq); + pcie->msi[i].disabled = true; + } + + return 0; +} + +static u64 apple_h9p_read_pci_cap(struct apple_h9p_pcie *pcie, + unsigned int busdevfn, u32 type) +{ + void __iomem *cfg = pcie->base_config + (busdevfn << 12); + u32 ptr = readl(cfg + PCI_CAPABILITY_LIST) & 0xff; + + while (ptr) { + u32 next = readl(cfg + ptr); + + if ((next & 0xff) == type) + return ptr; + ptr = (next >> 8) & 0xff; + } + + return 0; +} + +static int apple_h9p_wait(void __iomem *addr, u32 mask, u32 min, u32 max, + unsigned long timeout_us) +{ + u32 val; + + return readl_poll_timeout(addr, val, (val & mask) >= min && + (val & mask) <= max, 1000, timeout_us); +} + +static int apple_h9p_wait_gpio(struct gpio_desc *desc, int value, + unsigned long timeout_us) +{ + ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); + + do { + if (gpiod_get_raw_value(desc) == value) + return 0; + usleep_range(1000, 2000); + } while (ktime_before(ktime_get(), timeout)); + + return -ETIMEDOUT; +} + +static irqreturn_t apple_h9p_nvmmu_irq(int irq, void *data) +{ + struct apple_h9p_nvmmu *nvmmu = data; + struct apple_h9p_pcie *pcie = nvmmu->pcie; + unsigned int port = nvmmu - pcie->nvmmu; + + dev_err_ratelimited(pcie->dev, "port %u NVMMU fault interrupt\n", port); + return IRQ_HANDLED; +} + +static int apple_h9p_setup_nvmmu_port(struct apple_h9p_pcie *pcie, + unsigned int port) +{ + struct apple_h9p_nvmmu *nvmmu = &pcie->nvmmu[port]; + struct device *dev = pcie->dev; + struct device_node *mem_np; + struct resource res; + u32 iova; + int irq; + int ret; + + if (!nvmmu->base) + return 0; + + mem_np = of_parse_phandle(dev->of_node, "memory-region", port); + if (!mem_np) + return dev_err_probe(dev, -EINVAL, + "port %u NVMMU missing memory-region\n", + port); + + ret = of_address_to_resource(mem_np, 0, &res); + if (ret) + goto out_put_node; + + ret = of_property_read_u32(dev->of_node, "apple,nvmmu-iova", &iova); + if (ret) + goto out_put_node; + + if (resource_size(&res) < H9P_NVMMU_SART_ALIGNMENT || + !IS_ALIGNED(res.start, H9P_NVMMU_SART_ALIGNMENT) || + !IS_ALIGNED(iova, H9P_NVMMU_SART_ALIGNMENT)) { + ret = -EINVAL; + goto out_put_node; + } + + nvmmu->pcie = pcie; + nvmmu->pa_base = res.start; + nvmmu->va_base = iova; + nvmmu->size = resource_size(&res); + nvmmu->tcb_size = round_up(APPLE_H9P_NVMMU_MAX_REQS * + H9P_NVMMU_TCB_BYTES, PAGE_SIZE); + nvmmu->tcb_table_size = PAGE_SIZE * 16; + nvmmu->tcb_sgl_size = round_up(APPLE_H9P_NVMMU_MAX_REQS * + H9P_NVMMU_SGL_WORDS * sizeof(u32), + PAGE_SIZE); + + nvmmu->tcb = dmam_alloc_attrs(dev, nvmmu->tcb_size, &nvmmu->tcb_dma, + GFP_KERNEL | __GFP_ZERO, + DMA_ATTR_WRITE_COMBINE); + if (!nvmmu->tcb) { + ret = -ENOMEM; + goto out_put_node; + } + + nvmmu->tcb_table = dmam_alloc_attrs(dev, nvmmu->tcb_table_size, + &nvmmu->tcb_table_dma, + GFP_KERNEL | __GFP_ZERO, + DMA_ATTR_WRITE_COMBINE); + if (!nvmmu->tcb_table) { + ret = -ENOMEM; + goto out_put_node; + } + + nvmmu->tcb_sgl = dmam_alloc_attrs(dev, nvmmu->tcb_sgl_size, + &nvmmu->tcb_sgl_dma, + GFP_KERNEL | __GFP_ZERO, + DMA_ATTR_WRITE_COMBINE); + if (!nvmmu->tcb_sgl) { + ret = -ENOMEM; + goto out_put_node; + } + + h9p_writel_flush(lower_32_bits(nvmmu->tcb_dma), + nvmmu->base + H9P_NVMMU_TCB_BASE_LO); + h9p_writel_flush(upper_32_bits(nvmmu->tcb_dma), + nvmmu->base + H9P_NVMMU_TCB_BASE_HI); + h9p_writel_flush(lower_32_bits(nvmmu->tcb_table_dma), + nvmmu->base + H9P_NVMMU_TCB_TABLE_LO); + h9p_writel_flush(upper_32_bits(nvmmu->tcb_table_dma), + nvmmu->base + H9P_NVMMU_TCB_TABLE_HI); + h9p_writel_flush(0x10000, nvmmu->base + H9P_NVMMU_TCB_CTRL); + + ret = apple_h9p_wait(nvmmu->base + H9P_NVMMU_TCB_CTRL, 0x10, 0, 0, + 250000); + if (ret) + goto out_put_node; + + h9p_writel_flush(nvmmu->va_base - 0x80000000U, + nvmmu->base + H9P_NVMMU_SART_VA_BASE); + h9p_writel_flush(round_up(nvmmu->va_base + nvmmu->size, + H9P_NVMMU_SART_ALIGNMENT) - 0x80100000U, + nvmmu->base + H9P_NVMMU_SART_VA_END); + h9p_writel_flush(nvmmu->pa_base >> 20, + nvmmu->base + H9P_NVMMU_SART_PA_BASE); + h9p_writel_flush(1, nvmmu->base + H9P_NVMMU_SART_CTRL); + + irq = platform_get_irq_optional(pcie->pdev, H9P_NUM_PORTS + + H9P_NUM_MSI + port); + if (irq > 0) { + ret = devm_request_irq(dev, irq, apple_h9p_nvmmu_irq, 0, + dev_name(dev), nvmmu); + if (ret) + goto out_put_node; + } else if (irq != -ENXIO) { + ret = irq; + goto out_put_node; + } + + dev_dbg(dev, "port %u NVMMU window %#x@%pa size %#x\n", port, + nvmmu->va_base, &res.start, nvmmu->size); + +out_put_node: + of_node_put(mem_np); + return ret; +} + +static int apple_h9p_setup_nvmmu(struct apple_h9p_pcie *pcie) +{ + unsigned int port; + int ret; + + for (port = 0; port < H9P_NUM_PORTS; port++) { + if (!(pcie->enabled_ports & BIT(port))) + continue; + + ret = apple_h9p_setup_nvmmu_port(pcie, port); + if (ret) + return dev_err_probe(pcie->dev, ret, + "port %u NVMMU setup failed\n", + port); + } + + return 0; +} + +int apple_h9p_pcie_map_nvmmu(struct device *dev, unsigned int tag, + const u64 *pages, unsigned int npages, + dma_addr_t *iova) +{ + struct apple_h9p_nvmmu *nvmmu; + struct apple_h9p_pcie *pcie; + struct device *host_dev = dev; + unsigned int port; + unsigned int i; + u64 sgl_dma; + u32 *tcb; + u32 *sgl; + int ret; + + if (tag >= APPLE_H9P_NVMMU_MAX_REQS || + npages > APPLE_H9P_NVMMU_MAX_PAGES) + return -EINVAL; + if (npages && !pages) + return -EINVAL; + + while (host_dev && host_dev->bus == dev->bus) + host_dev = host_dev->parent; + if (!host_dev || !host_dev->parent) + return -ENODEV; + + pcie = apple_h9p_pcie_lookup(host_dev->parent); + if (!pcie) + return -ENODEV; + + ret = apple_h9p_pcie_device_port(pcie, dev); + if (ret < 0) + return ret; + port = ret; + if (port >= H9P_NUM_PORTS || !(pcie->enabled_ports & BIT(port))) + return -ENODEV; + + nvmmu = &pcie->nvmmu[port]; + if (!nvmmu->base || !nvmmu->tcb || !nvmmu->tcb_sgl) + return -EOPNOTSUPP; + + tcb = (u32 *)nvmmu->tcb + tag * H9P_NVMMU_TCB_DWORDS; + sgl = (u32 *)nvmmu->tcb_sgl + tag * H9P_NVMMU_SGL_WORDS; + memset(tcb, 0, H9P_NVMMU_TCB_BYTES); + memset(sgl, 0, H9P_NVMMU_SGL_WORDS * sizeof(*sgl)); + + if (npages) { + tcb[0] = H9P_NVMMU_TCB_READ | H9P_NVMMU_TCB_WRITE; + tcb[1] = npages; + tcb[2] = pages[0] >> ilog2(APPLE_H9P_NVMMU_PAGE_SIZE); + for (i = 0; i < npages; i++) + sgl[i] = pages[i] >> ilog2(APPLE_H9P_NVMMU_PAGE_SIZE); + + sgl_dma = nvmmu->tcb_sgl_dma + + tag * H9P_NVMMU_SGL_WORDS * sizeof(*sgl); + memcpy(&tcb[4], &sgl_dma, sizeof(sgl_dma)); + if (iova) + *iova = H9P_NVMMU_FLATDMA_BASE + + tag * H9P_NVMMU_FLATDMA_STRIDE; + } else { + dma_wmb(); + h9p_writel_flush(tag, nvmmu->base + H9P_NVMMU_TCB_CTRL); + if (iova) + *iova = 0; + return 0; + } + + dma_wmb(); + return 0; +} +EXPORT_SYMBOL_GPL(apple_h9p_pcie_map_nvmmu); + +static void apple_h9p_apply_tunables(void __iomem *base, + const struct apple_h9p_tunable *tunables, + unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; i++) { + u64 val = h9p_readsz(base + tunables[i].offset, tunables[i].size); + + if ((val & tunables[i].mask) == tunables[i].data) + continue; + val &= ~tunables[i].mask; + val |= tunables[i].data; + h9p_writesz(val, base + tunables[i].offset, tunables[i].size); + } +} + +static int apple_h9p_pcieclk_postup(struct apple_h9p_pcie *pcie) +{ + if (!pcie->base_pcieclk_postup) + return 0; + + writel(H9P_PCIECLK_POSTUP0_VALUE, + pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP0); + writel(H9P_PCIECLK_POSTUP1_VALUE, + pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP1); + writel(H9P_PCIECLK_POSTUP2_VALUE, + pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP2); + writel(H9P_PCIECLK_POSTUP3_VALUE, + pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP3); + + return 0; +} + +static bool apple_h9p_link_up(struct apple_h9p_pcie *pcie, unsigned int port) +{ + u32 linksts = readl(pcie->base_port[port] + H9P_PORT_LINKSTS); + + linksts = FIELD_GET(H9P_PORT_LINKSTS_LTSSM, linksts); + return linksts >= H9P_PORT_LTSSM_DETECT && linksts <= H9P_PORT_LTSSM_L0; +} + +static int apple_h9p_setup_port(struct apple_h9p_pcie *pcie, unsigned int port) +{ + struct device *dev = pcie->dev; + u64 cap; + int ret; + + if (apple_h9p_link_up(pcie, port)) + return 0; + + gpiod_direction_output(pcie->perst[port], 0); + + h9p_rmw(pcie->base_phy[0] + H9P_PHY0_PORT_CTL2(port), 1, 0); + h9p_rmw(pcie->base_phy[0] + H9P_PHY0_PORT_CTL1(port), 0, 1); + + ret = apple_h9p_wait(pcie->base_phy[0] + H9P_PHY0_COMMON_STAT, + H9P_PHY0_COMMON_STAT_INIT_DONE, + H9P_PHY0_COMMON_STAT_INIT_DONE, + H9P_PHY0_COMMON_STAT_INIT_DONE, 250000); + if (ret) + return dev_err_probe(dev, ret, "port %u init timeout\n", port); + + usleep_range(250, 1000); + h9p_rmw(pcie->base_phy[0] + H9P_PHY0_PORT_CTL0(port), 0, 1); + h9p_rmw(pcie->base_phy[0] + H9P_PHY0_PORT_CTL0(port), 0x100, 0); + usleep_range(500, 1000); + h9p_rmw(pcie->base_phy[0] + H9P_PHY0_PORT_CTL2(port), 0, 1); + + writel(port ? 0 : H9P_LINK_SPEED_8GT, + pcie->base_phy[0] + H9P_PHY0_PORT_LINK_RATE(port)); + h9p_rmw(pcie->base_phy[0] + H9P_PHY0_PORT_CTL1(port), 0x100, 0); + + cap = apple_h9p_read_pci_cap(pcie, port << 3, PCI_CAP_ID_EXP); + if (cap) + h9p_rmww(pcie->base_config + port * H9P_CFG_PORT_STRIDE + + cap + PCI_EXP_LNKCTL2, PCI_EXP_LNKCTL2_TLS, + port ? H9P_LINK_SPEED_2_5GT : H9P_LINK_SPEED_8GT); + + apple_h9p_apply_tunables(pcie->base_config + port * H9P_CFG_PORT_STRIDE, + h9p_config_tunables, + ARRAY_SIZE(h9p_config_tunables)); + apple_h9p_apply_tunables(pcie->base_port[port], h9p_port_tunables, + ARRAY_SIZE(h9p_port_tunables)); + + h9p_rmw(pcie->base_config + port * H9P_CFG_PORT_STRIDE + + H9P_CFG_PORT_MISC, 0, 1); + + writel(H9P_PORT_IRQMASK_PRE_LINK, + pcie->base_port[port] + H9P_PORT_IRQMASK); + writel(H9P_PORT_IRQSTAT_PRE_LINK, + pcie->base_port[port] + H9P_PORT_IRQSTAT); + + h9p_rmw(pcie->base_port[port] + H9P_PORT_ENABLE, 0, + H9P_PORT_ENABLE_APPLE); + writel(H9P_PORT_PWRCTL_INIT, pcie->base_port[port] + H9P_PORT_PWRCTL); + writel(port * 0x10001 * H9P_MSI_PER_PORT, + pcie->base_port[port] + H9P_PORT_MSIVECBASE); + + usleep_range(250, 1000); + ret = apple_h9p_wait_gpio(pcie->clkreq[port], 0, 250000); + if (ret) + return dev_err_probe(dev, ret, "port %u CLKREQ# timeout\n", + port); + + gpiod_direction_output(pcie->perst[port], 1); + usleep_range(250, 1000); + + ret = apple_h9p_wait(pcie->base_phy[1] + H9P_PHY1_PORTMASK, + BIT(port), BIT(port), BIT(port), 250000); + if (ret) + return dev_err_probe(dev, ret, "port %u PHY up timeout\n", + port); + + h9p_rmw(pcie->base_phy[2] + H9P_PHY2_EQ_COMMON0, 0, 0x4000); + h9p_rmw(pcie->base_phy[2] + H9P_PHY2_EQ_COMMON1, 0, 0x4000); + h9p_rmw(pcie->base_phy[2] + H9P_PHY2_EQ_TIME0, 0xfff, 100); + h9p_rmw(pcie->base_phy[2] + H9P_PHY2_EQ_TIME1, 0xfff, 25); + h9p_rmw(pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_EQ_CTL), + 0, 0x4000); + writel(0, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_IDLE)); + h9p_rmw(pcie->base_phy[2] + + H9P_PHY2_PORT(port, H9P_PHY2_PORT_EQ_PRESET), 0xfff, 0x600); + writel(0x3105, pcie->base_phy[2] + + H9P_PHY2_PORT(port, H9P_PHY2_PORT_RX_CTL0)); + h9p_rmw(pcie->base_phy[2] + + H9P_PHY2_PORT(port, H9P_PHY2_PORT_RX_CTL1), 0xff, 0x9f); + h9p_rmw(pcie->base_phy[2] + + H9P_PHY2_PORT(port, H9P_PHY2_PORT_RX_CTL2), 0xff, 0x01); + h9p_rmw(pcie->base_phy[2] + + H9P_PHY2_PORT(port, H9P_PHY2_PORT_RX_CTL3), 0x1f, 0x0a); + writel(175, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_TIMER0)); + writel(175, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_TIMER1)); + writel(333, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_TIMER2)); + writel(333, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_TIMER3)); + writel(530, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_TIMER4)); + writel(530, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_TIMER5)); + writel(0, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_CLEAR0)); + writel(0, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_CLEAR1)); + writel(0, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_CLEAR2)); + + writel(H9P_PORT_IRQMASK_LINK_UP, + pcie->base_port[port] + H9P_PORT_IRQMASK); + usleep_range(5000, 10000); + + h9p_rmw(pcie->base_port[port] + H9P_PORT_LTSSMCTL, 0, + H9P_PORT_LTSSM_ENABLE); + ret = apple_h9p_wait(pcie->base_port[port] + H9P_PORT_LINKSTS, + H9P_PORT_LINKSTS_LTSSM, + FIELD_PREP(H9P_PORT_LINKSTS_LTSSM, + H9P_PORT_LTSSM_DETECT), + FIELD_PREP(H9P_PORT_LINKSTS_LTSSM, + H9P_PORT_LTSSM_L0), + 500000); + if (ret) + dev_warn(dev, "port %u link did not reach L0\n", port); + + return 0; +} + +static int apple_h9p_setup_ports(struct apple_h9p_pcie *pcie) +{ + unsigned int port; + int ret; + + writel(H9P_PHY0_COMMON_CTL_INIT, + pcie->base_phy[0] + H9P_PHY0_COMMON_CTL0); + h9p_rmw(pcie->base_phy[0] + H9P_PHY0_PORT_CTL1(0), 0, + H9P_PHY0_COMMON_CTL_ENABLE); + + ret = apple_h9p_wait(pcie->base_phy[0] + H9P_PHY0_COMMON_STAT, + H9P_PHY0_COMMON_STAT_INIT_DONE, + H9P_PHY0_COMMON_STAT_INIT_DONE, + H9P_PHY0_COMMON_STAT_INIT_DONE, 250000); + if (ret) + return dev_err_probe(pcie->dev, ret, + "global PHY init timeout\n"); + + ret = apple_h9p_wait(pcie->base_phy[0] + H9P_PHY0_COMMON_STAT, + H9P_PHY0_COMMON_STAT_READY, + H9P_PHY0_COMMON_STAT_READY, + H9P_PHY0_COMMON_STAT_READY, 250000); + if (ret) + return dev_err_probe(pcie->dev, ret, + "global PHY ready timeout\n"); + + writel(H9P_PHY0_COMMON_CTL_ENABLE, + pcie->base_phy[0] + H9P_PHY0_COMMON_CTL3); + apple_h9p_apply_tunables(pcie->base_phy[0], h9p_phy0_tunables, + ARRAY_SIZE(h9p_phy0_tunables)); + writel(H9P_PHY0_COMMON_CTL_ENABLE, + pcie->base_phy[0] + H9P_PHY0_COMMON_CTL1); + usleep_range(5000, 10000); + writel(H9P_PHY0_COMMON_CTL_ENABLE, + pcie->base_phy[0] + H9P_PHY0_COMMON_CTL2); + usleep_range(500, 1000); + + for (port = 0; port < H9P_NUM_PORTS; port++) { + if (!(pcie->enabled_ports & BIT(port))) + continue; + + ret = apple_h9p_setup_port(pcie, port); + if (ret) + return ret; + } + + return 0; +} + +static int apple_h9p_pcie_init(struct pci_config_window *cfg) +{ + struct apple_h9p_pcie *pcie = apple_h9p_pcie_lookup(cfg->parent); + int ret; + + if (!pcie) + return -ENODEV; + + pcie->cfgwin = cfg; + pcie->base_config = cfg->win; + + ret = apple_h9p_pcieclk_postup(pcie); + if (ret) + return ret; + + ret = apple_h9p_setup_ports(pcie); + if (ret) + return ret; + + ret = apple_h9p_setup_nvmmu(pcie); + return ret; +} + +static const struct pci_ecam_ops apple_h9p_pcie_ecam_ops = { + .bus_shift = 20, + .init = apple_h9p_pcie_init, + .pci_ops = { + .map_bus = pci_ecam_map_bus, + .read = apple_h9p_pcie_config_read, + .write = apple_h9p_pcie_config_write, + }, +}; + +static int apple_h9p_pcie_map_resources(struct platform_device *pdev, + struct apple_h9p_pcie *pcie) +{ + struct device *dev = &pdev->dev; + unsigned int i; + + for (i = 0; i < 3; i++) { + char name[8]; + + snprintf(name, sizeof(name), "phy%u", i); + pcie->base_phy[i] = devm_platform_ioremap_resource_byname(pdev, + name); + if (IS_ERR(pcie->base_phy[i])) + return PTR_ERR(pcie->base_phy[i]); + } + + for (i = 0; i < H9P_NUM_PORTS; i++) { + char name[8]; + struct resource *res; + + snprintf(name, sizeof(name), "port%u", i); + pcie->base_port[i] = devm_platform_ioremap_resource_byname(pdev, + name); + if (IS_ERR(pcie->base_port[i])) + return PTR_ERR(pcie->base_port[i]); + + snprintf(name, sizeof(name), "nvmmu%u", i); + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name); + if (res) { + pcie->nvmmu[i].base = devm_ioremap_resource(dev, res); + if (IS_ERR(pcie->nvmmu[i].base)) + return PTR_ERR(pcie->nvmmu[i].base); + } + } + + pcie->base_pcieclk_postup = + devm_platform_ioremap_resource_byname(pdev, "pcieclk-postup"); + if (IS_ERR(pcie->base_pcieclk_postup)) { + if (PTR_ERR(pcie->base_pcieclk_postup) == -EINVAL) + pcie->base_pcieclk_postup = NULL; + else + return dev_err_probe(dev, + PTR_ERR(pcie->base_pcieclk_postup), + "failed to map pcieclk post-up\n"); + } + + return 0; +} + +static int apple_h9p_pcie_get_gpios(struct apple_h9p_pcie *pcie) +{ + struct device *dev = pcie->dev; + unsigned int i; + + for (i = 0; i < H9P_NUM_PORTS; i++) { + if (!(pcie->enabled_ports & BIT(i))) + continue; + + pcie->perst[i] = devm_gpiod_get_index(dev, "reset", i, + GPIOD_OUT_LOW); + if (IS_ERR(pcie->perst[i])) + return dev_err_probe(dev, PTR_ERR(pcie->perst[i]), + "failed to get PERST#%u\n", i); + + pcie->clkreq[i] = devm_gpiod_get_index(dev, "clkreq", i, + GPIOD_IN); + if (IS_ERR(pcie->clkreq[i])) + return dev_err_probe(dev, PTR_ERR(pcie->clkreq[i]), + "failed to get CLKREQ#%u\n", i); + } + + pcie->devpwr = devm_gpiod_get_array_optional(dev, "devpwr", GPIOD_ASIS); + if (IS_ERR(pcie->devpwr)) + return dev_err_probe(dev, PTR_ERR(pcie->devpwr), + "failed to get device power GPIOs\n"); + + return 0; +} + +static int apple_h9p_pcie_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct pci_host_bridge *bridge; + struct apple_h9p_pcie *pcie; + int ret; + + bridge = devm_pci_alloc_host_bridge(dev, sizeof(*pcie)); + if (!bridge) + return -ENOMEM; + + pcie = pci_host_bridge_priv(bridge); + pcie->dev = dev; + pcie->pdev = pdev; + pcie->bridge = bridge; + spin_lock_init(&pcie->used_msi_lock); + + ret = of_property_read_u32(dev->of_node, "apple,enabled-ports", + &pcie->enabled_ports); + if (ret) + pcie->enabled_ports = BIT(0); + pcie->enabled_ports &= GENMASK(H9P_NUM_PORTS - 1, 0); + if (!pcie->enabled_ports) + return dev_err_probe(dev, -EINVAL, "no enabled ports\n"); + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); + if (ret) + return dev_err_probe(dev, ret, "failed to set DMA mask\n"); + + ret = apple_h9p_pcie_attach_genpd(pcie); + if (ret) + return dev_err_probe(dev, ret, "failed to attach power domains\n"); + ret = devm_add_action_or_reset(dev, apple_h9p_pcie_genpd_cleanup, pcie); + if (ret) + return ret; + + pcie->clks[0].id = "core"; + pcie->clks[1].id = "aux"; + pcie->clks[2].id = "ref"; + ret = devm_clk_bulk_get(dev, ARRAY_SIZE(pcie->clks), pcie->clks); + if (ret) + return dev_err_probe(dev, ret, "failed to get clocks\n"); + + ret = clk_bulk_prepare_enable(ARRAY_SIZE(pcie->clks), pcie->clks); + if (ret) + return dev_err_probe(dev, ret, "failed to enable clocks\n"); + ret = devm_add_action_or_reset(dev, apple_h9p_pcie_clk_cleanup, pcie); + if (ret) + return ret; + + pcie->pinctrl = devm_pinctrl_get_select_default(dev); + if (PTR_ERR(pcie->pinctrl) == -ENODEV) + pcie->pinctrl = NULL; + else if (IS_ERR(pcie->pinctrl)) + return dev_err_probe(dev, PTR_ERR(pcie->pinctrl), + "failed to select pinctrl state\n"); + + ret = apple_h9p_pcie_map_resources(pdev, pcie); + if (ret) + return ret; + + ret = apple_h9p_pcie_get_gpios(pcie); + if (ret) + return ret; + + ret = of_property_read_u64(dev->of_node, "apple,msi-doorbell", + &pcie->msi_doorbell); + if (ret) + pcie->msi_doorbell = H9P_DEFAULT_MSI_DOORBELL; + + ret = apple_h9p_pcie_setup_msi(pcie); + if (ret) + return dev_err_probe(dev, ret, "failed to set up MSI\n"); + + return pci_host_common_init(pdev, bridge, &apple_h9p_pcie_ecam_ops); +} + +static const struct of_device_id apple_h9p_pcie_of_match[] = { + { .compatible = "apple,t8010-pcie" }, + { } +}; +MODULE_DEVICE_TABLE(of, apple_h9p_pcie_of_match); + +static struct platform_driver apple_h9p_pcie_driver = { + .probe = apple_h9p_pcie_probe, + .driver = { + .name = "pcie-apple-h9p", + .of_match_table = apple_h9p_pcie_of_match, + .suppress_bind_attrs = true, + }, +}; +module_platform_driver(apple_h9p_pcie_driver); + +MODULE_DESCRIPTION("Apple H9P/T8010 PCIe host bridge driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/apple-h9p-pcie.h b/include/linux/apple-h9p-pcie.h new file mode 100644 index 00000000000000..c29219c281b208 --- /dev/null +++ b/include/linux/apple-h9p-pcie.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_APPLE_H9P_PCIE_H +#define _LINUX_APPLE_H9P_PCIE_H + +#include +#include +#include + +struct device; + +#define APPLE_H9P_NVMMU_MAX_REQS 36 +#define APPLE_H9P_NVMMU_MAX_PAGES 256 +#define APPLE_H9P_NVMMU_PAGE_SIZE 4096 + +#if IS_REACHABLE(CONFIG_PCIE_APPLE_H9P) +int apple_h9p_pcie_map_nvmmu(struct device *dev, unsigned int tag, + const u64 *pages, unsigned int npages, + dma_addr_t *iova); +#else +static inline int apple_h9p_pcie_map_nvmmu(struct device *dev, + unsigned int tag, const u64 *pages, + unsigned int npages, dma_addr_t *iova) +{ + return -EOPNOTSUPP; +} +#endif + +#endif /* _LINUX_APPLE_H9P_PCIE_H */