Kernel: Implement NVMe driver

I'm  actually able to boot this os fine on own laptop now!
This commit is contained in:
Bananymous 2024-01-14 01:39:48 +02:00
parent 812e9efd41
commit 54a92293da
11 changed files with 956 additions and 2 deletions

View File

@ -65,6 +65,9 @@ set(KERNEL_SOURCES
kernel/Storage/ATA/ATAController.cpp kernel/Storage/ATA/ATAController.cpp
kernel/Storage/ATA/ATADevice.cpp kernel/Storage/ATA/ATADevice.cpp
kernel/Storage/DiskCache.cpp kernel/Storage/DiskCache.cpp
kernel/Storage/NVMe/Controller.cpp
kernel/Storage/NVMe/Namespace.cpp
kernel/Storage/NVMe/Queue.cpp
kernel/Storage/Partition.cpp kernel/Storage/Partition.cpp
kernel/Storage/StorageDevice.cpp kernel/Storage/StorageDevice.cpp
kernel/Syscall.cpp kernel/Syscall.cpp

View File

@ -11,6 +11,7 @@
#endif #endif
#define PAGE_SIZE ((uintptr_t)4096) #define PAGE_SIZE ((uintptr_t)4096)
#define PAGE_SIZE_SHIFT 12
#define PAGE_ADDR_MASK (~(uintptr_t)0xFFF) #define PAGE_ADDR_MASK (~(uintptr_t)0xFFF)
namespace Kernel namespace Kernel

View File

@ -0,0 +1,52 @@
#pragma once
#include <BAN/Vector.h>
#include <kernel/InterruptController.h>
#include <kernel/PCI.h>
#include <kernel/Storage/NVMe/Definitions.h>
#include <kernel/Storage/NVMe/Namespace.h>
#include <kernel/Storage/NVMe/Queue.h>
namespace Kernel
{
class NVMeController final : public StorageController, public CharacterDevice
{
BAN_NON_COPYABLE(NVMeController);
BAN_NON_MOVABLE(NVMeController);
public:
static BAN::ErrorOr<BAN::RefPtr<StorageController>> create(PCI::Device&);
~NVMeController() { ASSERT_NOT_REACHED(); }
NVMeQueue& io_queue() { return *m_io_queue; }
virtual dev_t rdev() const override { return m_rdev; }
virtual BAN::StringView name() const override { return m_name; }
private:
NVMeController(PCI::Device& pci_device);
virtual BAN::ErrorOr<void> initialize() override;
BAN::ErrorOr<void> identify_controller();
BAN::ErrorOr<void> identify_namespaces();
BAN::ErrorOr<void> wait_until_ready(bool expected_value);
BAN::ErrorOr<void> create_admin_queue();
BAN::ErrorOr<void> create_io_queue();
private:
PCI::Device& m_pci_device;
BAN::UniqPtr<PCI::BarRegion> m_bar0;
volatile NVMe::ControllerRegisters* m_controller_registers;
BAN::UniqPtr<NVMeQueue> m_admin_queue;
BAN::UniqPtr<NVMeQueue> m_io_queue;
BAN::Vector<BAN::RefPtr<NVMeNamespace>> m_namespaces;
char m_name[20];
const dev_t m_rdev;
};
}

View File

@ -0,0 +1,295 @@
#pragma once
#include <stdint.h>
namespace Kernel::NVMe
{
struct CAP
{
uint64_t mqes : 16;
uint64_t cqr : 1;
uint64_t ams : 2;
uint64_t __reserved0 : 5;
uint64_t to : 8;
uint64_t dstrd : 4;
uint64_t nssrs : 1;
uint64_t css : 8;
uint64_t bps : 1;
uint64_t cps : 2;
uint64_t mpsmin : 4;
uint64_t mpsmax : 4;
uint64_t pmrs : 1;
uint64_t cmpbs : 1;
uint64_t nsss : 1;
uint64_t crms : 2;
uint64_t __reserved1 : 3;
};
static_assert(sizeof(CAP) == sizeof(uint64_t));
enum CAP_CSS
{
CAP_CSS_NVME = 1 << 0,
CAP_CSS_IO = 1 << 6,
CAP_CSS_ADMIN = 1 << 7,
};
struct VS
{
uint32_t tertiary : 8;
uint32_t minor : 8;
uint32_t major : 16;
};
static_assert(sizeof(VS) == sizeof(uint32_t));
struct CC
{
uint32_t en : 1;
uint32_t __reserved0 : 3;
uint32_t css : 3;
uint32_t mps : 4;
uint32_t ams : 3;
uint32_t shn : 2;
uint32_t iosqes : 4;
uint32_t iocqes : 4;
uint32_t crime : 1;
uint32_t __reserved1 : 7;
};
static_assert(sizeof(CC) == sizeof(uint32_t));
struct CSTS
{
uint32_t rdy : 1;
uint32_t cfs : 1;
uint32_t shts : 2;
uint32_t nssro : 1;
uint32_t pp : 1;
uint32_t st : 1;
uint32_t __reserved : 25;
};
static_assert(sizeof(CSTS) == sizeof(uint32_t));
struct AQA
{
uint32_t asqs : 12;
uint32_t __reserved0 : 4;
uint32_t acqs : 12;
uint32_t __reserved1 : 4;
};
static_assert(sizeof(AQA) == sizeof(uint32_t));
// BAR0
struct ControllerRegisters
{
CAP cap;
VS vs;
uint32_t intms;
uint32_t intmc;
CC cc;
uint8_t __reserved0[4];
CSTS csts;
uint32_t nssr;
AQA aqa;
uint64_t asq;
uint64_t acq;
static constexpr uint32_t SQ0TDBL = 0x1000;
};
static_assert(sizeof(ControllerRegisters) == 0x38);
struct DoorbellRegisters
{
uint32_t sq_tail;
uint32_t cq_head;
} __attribute__((packed));
struct CompletionQueueEntry
{
uint32_t dontcare[3];
uint16_t cid;
uint16_t sts;
} __attribute__((packed));
static_assert(sizeof(CompletionQueueEntry) == 16);
struct DataPtr
{
union
{
struct
{
uint64_t prp1;
uint64_t prp2;
};
uint8_t sgl1[16];
};
};
struct CommandGeneric
{
uint32_t nsid;
uint32_t cdw2;
uint32_t cdw3;
uint64_t mptr;
DataPtr dptr;
uint32_t cdw10;
uint32_t cdw11;
uint32_t cdw12;
uint32_t cdw13;
uint32_t cdw14;
uint32_t cdw15;
} __attribute__((packed));
static_assert(sizeof(CommandGeneric) == 15 * sizeof(uint32_t));
struct CommandIdentify
{
uint32_t nsid;
uint64_t __reserved0[2];
DataPtr dptr;
// dword 10
uint8_t cns;
uint8_t __reserved1;
uint16_t cntid;
// dword 11
uint16_t cnsid;
uint8_t __reserved2;
uint8_t csi;
// dword 12-15
uint32_t __reserved3[4];
} __attribute__((packed));
static_assert(sizeof(CommandIdentify) == 15 * sizeof(uint32_t));
struct CommandCreateCQ
{
uint32_t __reserved0;
uint64_t __reserved1[2];
DataPtr dptr;
// dword 10
uint16_t qid;
uint16_t qsize;
// dword 11
uint16_t pc : 1;
uint16_t ien : 1;
uint16_t __reserved2 : 14;
uint16_t iv;
// dword 12-15
uint32_t __reserved4[4];
} __attribute__((packed));
static_assert(sizeof(CommandCreateCQ) == 15 * sizeof(uint32_t));
struct CommandCreateSQ
{
uint32_t __reserved0;
uint64_t __reserved1[2];
DataPtr dptr;
// dword 10
uint16_t qid;
uint16_t qsize;
// dword 11
uint16_t pc : 1;
uint16_t qprio : 2;
uint16_t __reserved2 : 13;
uint16_t cqid;
// dword 12
uint16_t nvmsetid;
uint16_t __reserved4;
// dword 13-15
uint32_t __reserved5[3];
} __attribute__((packed));
static_assert(sizeof(CommandCreateSQ) == 15 * sizeof(uint32_t));
struct CommandRead
{
uint32_t nsid;
uint64_t __reserved0;
uint64_t mptr;
DataPtr dptr;
// dword 10-11
uint64_t slba;
// dword 12
uint16_t nlb;
uint16_t __reserved1;
// dword 13-15
uint32_t __reserved2[3];
} __attribute__((packed));
static_assert(sizeof(CommandRead) == 15 * sizeof(uint32_t));
struct SubmissionQueueEntry
{
uint8_t opc;
uint8_t fuse : 2;
uint8_t __reserved : 4;
uint8_t psdt : 2;
uint16_t cid;
union
{
CommandGeneric generic;
CommandIdentify identify;
CommandCreateCQ create_cq;
CommandCreateSQ create_sq;
CommandRead read;
};
} __attribute__((packed));
static_assert(sizeof(SubmissionQueueEntry) == 64);
enum OPC : uint8_t
{
OPC_ADMIN_CREATE_SQ = 0x01,
OPC_ADMIN_CREATE_CQ = 0x05,
OPC_ADMIN_IDENTIFY = 0x06,
OPC_IO_WRITE = 0x01,
OPC_IO_READ = 0x02,
};
enum CNS : uint8_t
{
CNS_INDENTIFY_NAMESPACE = 0x00,
CNS_INDENTIFY_CONTROLLER = 0x01,
CNS_INDENTIFY_ACTIVE_NAMESPACES = 0x02,
};
struct NamespaceIdentify
{
uint64_t nsze;
uint64_t ncap;
uint64_t nuse;
uint8_t nsfeat;
uint8_t nlbaf;
uint8_t flbas;
uint8_t mc;
uint8_t dpc;
uint8_t dps;
uint8_t nmic;
uint8_t rescap;
uint8_t fpi;
uint8_t dlfeat;
uint16_t nawun;
uint16_t nawupf;
uint16_t nacwu;
uint16_t nabsn;
uint16_t nabo;
uint16_t nabspf;
uint16_t noiob;
uint64_t nvmcap[2];
uint16_t npwg;
uint16_t npwa;
uint16_t npdg;
uint16_t npda;
uint16_t nows;
uint16_t mssrl;
uint32_t mcl;
uint8_t msrc;
uint8_t __reserved0[11];
uint32_t adagrpid;
uint8_t __reserved1[3];
uint8_t nsattr;
uint16_t nvmsetid;
uint16_t endgid;
uint64_t nguid[2];
uint64_t eui64;
uint32_t lbafN[64];
uint8_t vendor_specific[3712];
} __attribute__((packed));
static_assert(sizeof(NamespaceIdentify) == 0x1000);
}

View File

@ -0,0 +1,41 @@
#pragma once
#include <kernel/Memory/DMARegion.h>
#include <kernel/Storage/StorageDevice.h>
namespace Kernel
{
class NVMeController;
class NVMeNamespace : public StorageDevice
{
public:
static BAN::ErrorOr<BAN::RefPtr<NVMeNamespace>> create(NVMeController&, uint32_t nsid, uint64_t block_count, uint32_t block_size);
virtual uint32_t sector_size() const override { return m_block_size; }
virtual uint64_t total_size() const override { return m_block_size * m_block_count; }
virtual dev_t rdev() const override { return m_rdev; }
virtual BAN::StringView name() const { return m_name; }
private:
NVMeNamespace(NVMeController&, uint32_t nsid, uint64_t block_count, uint32_t block_size);
BAN::ErrorOr<void> initialize();
virtual BAN::ErrorOr<void> read_sectors_impl(uint64_t lba, uint64_t sector_count, BAN::ByteSpan) override;
virtual BAN::ErrorOr<void> write_sectors_impl(uint64_t lba, uint64_t sector_count, BAN::ConstByteSpan) override;
private:
NVMeController& m_controller;
BAN::UniqPtr<DMARegion> m_dma_region;
const uint32_t m_nsid;
const uint32_t m_block_size;
const uint64_t m_block_count;
char m_name[10] {};
const dev_t m_rdev;
};
}

View File

@ -0,0 +1,37 @@
#pragma once
#include <BAN/UniqPtr.h>
#include <BAN/Vector.h>
#include <kernel/InterruptController.h>
#include <kernel/Memory/DMARegion.h>
#include <kernel/Semaphore.h>
#include <kernel/Storage/NVMe/Definitions.h>
namespace Kernel
{
class NVMeQueue : public Interruptable
{
public:
NVMeQueue(BAN::UniqPtr<Kernel::DMARegion>&& cq, BAN::UniqPtr<Kernel::DMARegion>&& sq, volatile NVMe::DoorbellRegisters& db, uint32_t qdepth, uint8_t irq);
uint16_t submit_command(NVMe::SubmissionQueueEntry& sqe);
virtual void handle_irq() final override;
private:
SpinLock m_lock;
BAN::UniqPtr<Kernel::DMARegion> m_completion_queue;
BAN::UniqPtr<Kernel::DMARegion> m_submission_queue;
volatile NVMe::DoorbellRegisters& m_doorbell;
const uint32_t m_qdepth;
uint32_t m_sq_tail { 0 };
uint32_t m_cq_head { 0 };
uint16_t m_cq_valid_phase { 1 };
Semaphore m_semaphore;
volatile uint16_t m_status;
volatile bool m_done { false };
};
}

View File

@ -6,6 +6,7 @@
#include <kernel/PCI.h> #include <kernel/PCI.h>
#include <kernel/Storage/ATA/AHCI/Controller.h> #include <kernel/Storage/ATA/AHCI/Controller.h>
#include <kernel/Storage/ATA/ATAController.h> #include <kernel/Storage/ATA/ATAController.h>
#include <kernel/Storage/NVMe/Controller.h>
#define INVALID_VENDOR 0xFFFF #define INVALID_VENDOR 0xFFFF
#define MULTI_FUNCTION 0x80 #define MULTI_FUNCTION 0x80
@ -170,6 +171,14 @@ namespace Kernel::PCI
if (auto res = ATAController::create(pci_device); res.is_error()) if (auto res = ATAController::create(pci_device); res.is_error())
dprintln("ATA: {}", res.error()); dprintln("ATA: {}", res.error());
break; break;
case 0x08:
// FIXME: HACK if inode initialization fails before it attaches to DevFS,
// it will kernel panic. This is used to make nvme eternal
if (auto res = NVMeController::create(pci_device); res.is_error())
dprintln("NVMe: {}", res.error());
else
res.value()->ref();
break;
default: default:
dprintln("unsupported storage device (pci {2H}.{2H}.{2H})", pci_device.class_code(), pci_device.subclass(), pci_device.prog_if()); dprintln("unsupported storage device (pci {2H}.{2H}.{2H})", pci_device.class_code(), pci_device.subclass(), pci_device.prog_if());
break; break;

View File

@ -0,0 +1,310 @@
#include <BAN/Array.h>
#include <kernel/FS/DevFS/FileSystem.h>
#include <kernel/Memory/DMARegion.h>
#include <kernel/Storage/NVMe/Controller.h>
#include <kernel/Timer/Timer.h>
#include <sys/sysmacros.h>
#define DEBUG_NVMe 1
namespace Kernel
{
static dev_t get_ctrl_dev_major()
{
static dev_t major = DevFileSystem::get().get_next_dev();
return major;
}
static dev_t get_ctrl_dev_minor()
{
static dev_t minor = 0;
return minor++;
}
BAN::ErrorOr<BAN::RefPtr<StorageController>> NVMeController::create(PCI::Device& pci_device)
{
auto* controller_ptr = new NVMeController(pci_device);
if (controller_ptr == nullptr)
return BAN::Error::from_errno(ENOMEM);
auto controller = BAN::RefPtr<StorageController>::adopt(controller_ptr);
TRY(controller->initialize());
return controller;
}
NVMeController::NVMeController(PCI::Device& pci_device)
: CharacterDevice(0600, 0, 0)
, m_pci_device(pci_device)
, m_rdev(makedev(get_ctrl_dev_major(), get_ctrl_dev_minor()))
{
ASSERT(minor(m_rdev) < 10);
strcpy(m_name, "nvmeX");
m_name[4] = '0' + minor(m_rdev);
}
BAN::ErrorOr<void> NVMeController::initialize()
{
// See NVM express base specification section 3.5.1
m_pci_device.enable_bus_mastering();
m_pci_device.enable_memory_space();
m_bar0 = TRY(m_pci_device.allocate_bar_region(0));
if (m_bar0->type() != PCI::BarType::MEM)
{
dwarnln("NVMe controller BAR0 is not MEM");
return BAN::Error::from_errno(EINVAL);
}
if (m_bar0->size() < 0x1000)
{
dwarnln("NVMe controller BAR0 is too small {} bytes", m_bar0->size());
return BAN::Error::from_errno(EINVAL);
}
m_controller_registers = reinterpret_cast<volatile NVMe::ControllerRegisters*>(m_bar0->vaddr());
const auto& vs = m_controller_registers->vs;
if (vs.major != 1)
{
dwarnln("NVMe controller has unsupported version {}.{}", (uint16_t)vs.major, (uint8_t)vs.minor);
return BAN::Error::from_errno(ENOTSUP);
}
dprintln_if(DEBUG_NVMe, "NVMe controller");
dprintln_if(DEBUG_NVMe, " version: {}.{}", (uint16_t)vs.major, (uint8_t)vs.minor);
auto& cap = m_controller_registers->cap;
if (!(cap.css & NVMe::CAP_CSS_NVME))
{
dwarnln("NVMe controller does not support NVMe command set");
return BAN::Error::from_errno(ECANCELED);
}
const uint64_t min_page_size = 1ull << (12 + cap.mpsmin);
const uint64_t max_page_size = 1ull << (12 + cap.mpsmax);
if (PAGE_SIZE < min_page_size || PAGE_SIZE > max_page_size)
{
dwarnln("NVMe controller does not support {} byte pages, only {}-{} byte pages are supported", PAGE_SIZE, min_page_size, max_page_size);
return BAN::Error::from_errno(ECANCELED);
}
// One for aq and one for ioq
TRY(m_pci_device.reserve_irqs(2));
auto& cc = m_controller_registers->cc;
if (cc.en)
TRY(wait_until_ready(true));
cc.en = 0;
TRY(wait_until_ready(false));
dprintln_if(DEBUG_NVMe, " controller reset");
TRY(create_admin_queue());
dprintln_if(DEBUG_NVMe, " created admin queue");
// Configure controller
cc.ams = 0;
cc.mps = PAGE_SIZE_SHIFT - 12;
cc.css = 0b000;
cc.en = 1;
TRY(wait_until_ready(true));
dprintln_if(DEBUG_NVMe, " controller enabled");
TRY(identify_controller());
cc.iocqes = 4; static_assert(1 << 4 == sizeof(NVMe::CompletionQueueEntry));
cc.iosqes = 6; static_assert(1 << 6 == sizeof(NVMe::SubmissionQueueEntry));
TRY(create_io_queue());
dprintln_if(DEBUG_NVMe, " created io queue");
TRY(identify_namespaces());
DevFileSystem::get().add_device(this);
return {};
}
BAN::ErrorOr<void> NVMeController::wait_until_ready(bool expected_value)
{
const auto& cap = m_controller_registers->cap;
const auto& csts = m_controller_registers->csts;
uint64_t timeout = SystemTimer::get().ms_since_boot() + 500 * cap.to;
while (csts.rdy != expected_value)
{
if (SystemTimer::get().ms_since_boot() >= timeout)
{
dwarnln("NVMe controller reset timedout");
return BAN::Error::from_errno(ETIMEDOUT);
}
}
return {};
}
BAN::ErrorOr<void> NVMeController::identify_controller()
{
auto dma_page = TRY(DMARegion::create(PAGE_SIZE));
NVMe::SubmissionQueueEntry sqe {};
sqe.opc = NVMe::OPC_ADMIN_IDENTIFY;
sqe.identify.dptr.prp1 = dma_page->paddr();
sqe.identify.cns = NVMe::CNS_INDENTIFY_CONTROLLER;
if (uint16_t status = m_admin_queue->submit_command(sqe))
{
dwarnln("NVMe controller identify failed (status {4H})", status);
return BAN::Error::from_errno(EFAULT);
}
if (*reinterpret_cast<uint16_t*>(dma_page->vaddr()) != m_pci_device.vendor_id())
{
dwarnln("NVMe controller vendor id does not match with the one in PCI");
return BAN::Error::from_errno(EFAULT);
}
dprintln_if(DEBUG_NVMe, " model: '{}'", BAN::StringView { (char*)dma_page->vaddr() + 24, 20 });
return {};
}
BAN::ErrorOr<void> NVMeController::identify_namespaces()
{
auto dma_page = TRY(DMARegion::create(PAGE_SIZE));
BAN::Vector<uint32_t> namespace_ids;
TRY(namespace_ids.resize(PAGE_SIZE / sizeof(uint32_t)));
{
NVMe::SubmissionQueueEntry sqe {};
sqe.opc = NVMe::OPC_ADMIN_IDENTIFY;
sqe.identify.dptr.prp1 = dma_page->paddr();
sqe.identify.cns = NVMe::CNS_INDENTIFY_ACTIVE_NAMESPACES;
if (uint16_t status = m_admin_queue->submit_command(sqe))
{
dwarnln("NVMe active namespace identify failed (status {4H})", status);
return BAN::Error::from_errno(EFAULT);
}
memcpy(namespace_ids.data(), reinterpret_cast<void*>(dma_page->vaddr()), PAGE_SIZE);
}
for (uint32_t nsid : namespace_ids)
{
if (nsid == 0)
break;
dprintln(" found namespace {}", nsid);
NVMe::SubmissionQueueEntry sqe {};
sqe.opc = NVMe::OPC_ADMIN_IDENTIFY;
sqe.identify.nsid = nsid;
sqe.identify.dptr.prp1 = dma_page->paddr();
sqe.identify.cns = NVMe::CNS_INDENTIFY_NAMESPACE;
if (uint16_t status = m_admin_queue->submit_command(sqe))
{
dwarnln("NVMe namespace {} identify failed (status {4H})", nsid , status);
return BAN::Error::from_errno(EFAULT);
}
auto& namespace_info = *reinterpret_cast<volatile NVMe::NamespaceIdentify*>(dma_page->vaddr());
const uint64_t block_count = namespace_info.nsze;
const uint64_t format = namespace_info.lbafN[namespace_info.flbas & 0x0F];
const uint64_t block_size = 1u << ((format >> 16) & 0xFF);
dprintln(" block count {}", block_count);
dprintln(" block size {} B", block_size);
dprintln(" total {} MiB", block_count * block_size / (1 << 20));
auto ns = TRY(NVMeNamespace::create(*this, nsid, block_count, block_size));
TRY(m_namespaces.push_back(BAN::move(ns)));
}
return {};
}
BAN::ErrorOr<void> NVMeController::create_admin_queue()
{
const uint32_t admin_queue_depth = BAN::Math::min(PAGE_SIZE / sizeof(NVMe::CompletionQueueEntry), PAGE_SIZE / sizeof(NVMe::SubmissionQueueEntry));
auto& aqa = m_controller_registers->aqa;
aqa.acqs = admin_queue_depth - 1;
aqa.asqs = admin_queue_depth - 1;
dprintln_if(DEBUG_NVMe, " admin queue depth is {}", admin_queue_depth);
const uint32_t completion_queue_size = admin_queue_depth * sizeof(NVMe::CompletionQueueEntry);
auto completion_queue = TRY(DMARegion::create(completion_queue_size));
memset((void*)completion_queue->vaddr(), 0x00, completion_queue->size());
const uint32_t submission_queue_size = admin_queue_depth * sizeof(NVMe::SubmissionQueueEntry);
auto submission_queue = TRY(DMARegion::create(submission_queue_size));
memset((void*)submission_queue->vaddr(), 0x00, submission_queue->size());
m_controller_registers->acq = completion_queue->paddr();
m_controller_registers->asq = submission_queue->paddr();
uint8_t irq = m_pci_device.get_irq(0);
dprintln_if(DEBUG_NVMe, " admin queue using irq {}", irq);
auto& doorbell = *reinterpret_cast<volatile NVMe::DoorbellRegisters*>(m_bar0->vaddr() + NVMe::ControllerRegisters::SQ0TDBL);
m_admin_queue = TRY(BAN::UniqPtr<NVMeQueue>::create(BAN::move(completion_queue), BAN::move(submission_queue), doorbell, admin_queue_depth, irq));
return {};
}
BAN::ErrorOr<void> NVMeController::create_io_queue()
{
constexpr uint32_t queue_size = PAGE_SIZE;
constexpr uint32_t queue_elems = queue_size / BAN::Math::max(sizeof(NVMe::CompletionQueueEntry), sizeof(NVMe::SubmissionQueueEntry));
auto completion_queue = TRY(DMARegion::create(queue_size));
memset((void*)completion_queue->vaddr(), 0x00, completion_queue->size());
auto submission_queue = TRY(DMARegion::create(queue_size));
memset((void*)submission_queue->vaddr(), 0x00, submission_queue->size());
{
NVMe::SubmissionQueueEntry sqe {};
sqe.opc = NVMe::OPC_ADMIN_CREATE_CQ;
sqe.create_cq.dptr.prp1 = completion_queue->paddr();
sqe.create_cq.qsize = queue_elems - 1;
sqe.create_cq.qid = 1;
sqe.create_cq.iv = 1;
sqe.create_cq.ien = 1;
sqe.create_cq.pc = 1;
if (uint16_t status = m_admin_queue->submit_command(sqe))
{
dwarnln("NVMe io completion queue creation failed (status {4H})", status);
return BAN::Error::from_errno(EFAULT);
}
}
{
NVMe::SubmissionQueueEntry sqe {};
sqe.opc = NVMe::OPC_ADMIN_CREATE_SQ;
sqe.create_sq.dptr.prp1 = submission_queue->paddr();
sqe.create_sq.qsize = queue_elems - 1;
sqe.create_sq.qid = 1;
sqe.create_sq.cqid = 1;
sqe.create_sq.qprio = 0;
sqe.create_sq.pc = 1;
sqe.create_sq.nvmsetid = 0;
if (uint16_t status = m_admin_queue->submit_command(sqe))
{
dwarnln("NVMe io submission queue creation failed (status {4H})", status);
return BAN::Error::from_errno(EFAULT);
}
}
uint8_t irq = m_pci_device.get_irq(1);
dprintln_if(DEBUG_NVMe, " io queue using irq {}", irq);
const uint32_t doorbell_stride = 1 << (2 + m_controller_registers->cap.dstrd);
const uint32_t doorbell_offset = 2 * doorbell_stride;
auto& doorbell = *reinterpret_cast<volatile NVMe::DoorbellRegisters*>(m_bar0->vaddr() + NVMe::ControllerRegisters::SQ0TDBL + doorbell_offset);
m_io_queue = TRY(BAN::UniqPtr<NVMeQueue>::create(BAN::move(completion_queue), BAN::move(submission_queue), doorbell, queue_elems, irq));
return {};
}
}

View File

@ -0,0 +1,119 @@
#include <kernel/FS/DevFS/FileSystem.h>
#include <kernel/Storage/NVMe/Controller.h>
#include <kernel/Storage/NVMe/Namespace.h>
#include <sys/sysmacros.h>
namespace Kernel
{
static dev_t get_ns_dev_major()
{
static dev_t major = DevFileSystem::get().get_next_dev();
return major;
}
static dev_t get_ns_dev_minor()
{
static dev_t minor = 0;
return minor++;
}
BAN::ErrorOr<BAN::RefPtr<NVMeNamespace>> NVMeNamespace::create(NVMeController& controller, uint32_t nsid, uint64_t block_count, uint32_t block_size)
{
auto* namespace_ptr = new NVMeNamespace(controller, nsid, block_count, block_size);
if (namespace_ptr == nullptr)
return BAN::Error::from_errno(ENOMEM);
auto ns = BAN::RefPtr<NVMeNamespace>::adopt(namespace_ptr);
TRY(ns->initialize());
return ns;
}
NVMeNamespace::NVMeNamespace(NVMeController& controller, uint32_t nsid, uint64_t block_count, uint32_t block_size)
: m_controller(controller)
, m_nsid(nsid)
, m_block_size(block_size)
, m_block_count(block_count)
, m_rdev(makedev(get_ns_dev_major(), get_ns_dev_minor()))
{
ASSERT(minor(m_rdev) < 10);
ASSERT(m_controller.name().size() + 2 < sizeof(m_name));
memcpy(m_name, m_controller.name().data(), m_controller.name().size());
m_name[m_controller.name().size() + 0] = 'n';
m_name[m_controller.name().size() + 1] = '1' + minor(m_rdev);
m_name[m_controller.name().size() + 2] = '\0';
}
BAN::ErrorOr<void> NVMeNamespace::initialize()
{
m_dma_region = TRY(DMARegion::create(PAGE_SIZE));
add_disk_cache();
DevFileSystem::get().add_device(this);
char name_prefix[20];
strcpy(name_prefix, m_name);
strcat(name_prefix, "p");
if (auto res = initialize_partitions(name_prefix); res.is_error())
dprintln("{}", res.error());
return {};
}
BAN::ErrorOr<void> NVMeNamespace::read_sectors_impl(uint64_t lba, uint64_t sector_count, BAN::ByteSpan buffer)
{
ASSERT(buffer.size() >= sector_count * m_block_size);
for (uint64_t i = 0; i < sector_count;)
{
uint16_t count = BAN::Math::min(sector_count - i, m_dma_region->size() / m_block_size);
NVMe::SubmissionQueueEntry sqe {};
sqe.opc = NVMe::OPC_IO_READ;
sqe.read.nsid = m_nsid;
sqe.read.dptr.prp1 = m_dma_region->paddr();
sqe.read.slba = lba + i;
sqe.read.nlb = count - 1;
if (uint16_t status = m_controller.io_queue().submit_command(sqe))
{
dwarnln("NVMe read failed (status {4H})", status);
return BAN::Error::from_errno(EIO);
}
memcpy(buffer.data() + i * m_block_size, reinterpret_cast<void*>(m_dma_region->vaddr()), count * m_block_size);
i += count;
}
return {};
}
BAN::ErrorOr<void> NVMeNamespace::write_sectors_impl(uint64_t lba, uint64_t sector_count, BAN::ConstByteSpan buffer)
{
ASSERT(buffer.size() >= sector_count * m_block_size);
for (uint64_t i = 0; i < sector_count;)
{
uint16_t count = BAN::Math::min(sector_count - i, m_dma_region->size() / m_block_size);
memcpy(reinterpret_cast<void*>(m_dma_region->vaddr()), buffer.data() + i * m_block_size, count * m_block_size);
NVMe::SubmissionQueueEntry sqe {};
sqe.opc = NVMe::OPC_IO_WRITE;
sqe.read.nsid = m_nsid;
sqe.read.dptr.prp1 = m_dma_region->paddr();
sqe.read.slba = lba + i;
sqe.read.nlb = count - 1;
if (uint16_t status = m_controller.io_queue().submit_command(sqe))
{
dwarnln("NVMe write failed (status {4H})", status);
return BAN::Error::from_errno(EIO);
}
i += count;
}
return {};
}
}

View File

@ -0,0 +1,82 @@
#include <kernel/LockGuard.h>
#include <kernel/Scheduler.h>
#include <kernel/Storage/NVMe/Queue.h>
#include <kernel/Timer/Timer.h>
namespace Kernel
{
static constexpr uint64_t s_nvme_command_timeout_ms = 1000;
static constexpr uint64_t s_nvme_command_poll_timeout_ms = 20;
NVMeQueue::NVMeQueue(BAN::UniqPtr<Kernel::DMARegion>&& cq, BAN::UniqPtr<Kernel::DMARegion>&& sq, volatile NVMe::DoorbellRegisters& db, uint32_t qdepth, uint8_t irq)
: m_completion_queue(BAN::move(cq))
, m_submission_queue(BAN::move(sq))
, m_doorbell(db)
, m_qdepth(qdepth)
{
set_irq(irq);
enable_interrupt();
}
void NVMeQueue::handle_irq()
{
auto* cq_ptr = reinterpret_cast<NVMe::CompletionQueueEntry*>(m_completion_queue->vaddr());
while ((cq_ptr[m_cq_head].sts & 1) == m_cq_valid_phase)
{
uint16_t sts = cq_ptr[m_cq_head].sts >> 1;
uint16_t cid = cq_ptr[m_cq_head].cid;
ASSERT(cid == 0);
ASSERT(!m_done);
m_status = sts;
m_done = true;
m_semaphore.unblock();
m_cq_head = (m_cq_head + 1) % m_qdepth;
if (m_cq_head == 0)
m_cq_valid_phase ^= 1;
}
m_doorbell.cq_head = m_cq_head;
}
uint16_t NVMeQueue::submit_command(NVMe::SubmissionQueueEntry& sqe)
{
LockGuard _(m_lock);
ASSERT(m_done == false);
m_status = 0;
sqe.cid = 0;
auto* sqe_ptr = reinterpret_cast<NVMe::SubmissionQueueEntry*>(m_submission_queue->vaddr());
memcpy(&sqe_ptr[m_sq_tail], &sqe, sizeof(NVMe::SubmissionQueueEntry));
m_sq_tail = (m_sq_tail + 1) % m_qdepth;
m_doorbell.sq_tail = m_sq_tail;
const uint64_t start_time = SystemTimer::get().ms_since_boot();
while (SystemTimer::get().ms_since_boot() < start_time + s_nvme_command_poll_timeout_ms)
{
if (!m_done)
continue;
m_done = false;
return m_status;
}
while (SystemTimer::get().ms_since_boot() < start_time + s_nvme_command_timeout_ms)
{
if (!m_done)
{
m_semaphore.block();
continue;
}
m_done = false;
return m_status;
}
return 0xFFFF;
}
}

View File

@ -13,11 +13,16 @@ if (($BANAN_UEFI_BOOT)); then
BIOS_ARGS="-bios $OVMF_PATH -net none" BIOS_ARGS="-bios $OVMF_PATH -net none"
fi fi
if [[ $BANAN_DISK_TYPE == "NVME" ]]; then
DISK_ARGS="-device nvme,serial=deadbeef,drive=disk"
else
DISK_ARGS="-device ahci,id=ahci -device ide-hd,drive=disk,bus=ahci.0"
fi
qemu-system-$BANAN_ARCH \ qemu-system-$BANAN_ARCH \
-m 128 \ -m 128 \
-smp 2 \ -smp 2 \
$BIOS_ARGS \ $BIOS_ARGS \
-drive format=raw,id=disk,file=${BANAN_DISK_IMAGE_PATH},if=none \ -drive format=raw,id=disk,file=${BANAN_DISK_IMAGE_PATH},if=none \
-device ahci,id=ahci \ $DISK_ARGS \
-device ide-hd,drive=disk,bus=ahci.0 \
$@ \ $@ \