Kernel: Implement NVMe driver
I'm actually able to boot this os fine on own laptop now!
This commit is contained in:
parent
812e9efd41
commit
54a92293da
|
@ -65,6 +65,9 @@ set(KERNEL_SOURCES
|
|||
kernel/Storage/ATA/ATAController.cpp
|
||||
kernel/Storage/ATA/ATADevice.cpp
|
||||
kernel/Storage/DiskCache.cpp
|
||||
kernel/Storage/NVMe/Controller.cpp
|
||||
kernel/Storage/NVMe/Namespace.cpp
|
||||
kernel/Storage/NVMe/Queue.cpp
|
||||
kernel/Storage/Partition.cpp
|
||||
kernel/Storage/StorageDevice.cpp
|
||||
kernel/Syscall.cpp
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#endif
|
||||
|
||||
#define PAGE_SIZE ((uintptr_t)4096)
|
||||
#define PAGE_SIZE_SHIFT 12
|
||||
#define PAGE_ADDR_MASK (~(uintptr_t)0xFFF)
|
||||
|
||||
namespace Kernel
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
#pragma once
|
||||
|
||||
#include <BAN/Vector.h>
|
||||
#include <kernel/InterruptController.h>
|
||||
#include <kernel/PCI.h>
|
||||
#include <kernel/Storage/NVMe/Definitions.h>
|
||||
#include <kernel/Storage/NVMe/Namespace.h>
|
||||
#include <kernel/Storage/NVMe/Queue.h>
|
||||
|
||||
namespace Kernel
|
||||
{
|
||||
|
||||
class NVMeController final : public StorageController, public CharacterDevice
|
||||
{
|
||||
BAN_NON_COPYABLE(NVMeController);
|
||||
BAN_NON_MOVABLE(NVMeController);
|
||||
|
||||
public:
|
||||
static BAN::ErrorOr<BAN::RefPtr<StorageController>> create(PCI::Device&);
|
||||
~NVMeController() { ASSERT_NOT_REACHED(); }
|
||||
|
||||
NVMeQueue& io_queue() { return *m_io_queue; }
|
||||
|
||||
virtual dev_t rdev() const override { return m_rdev; }
|
||||
virtual BAN::StringView name() const override { return m_name; }
|
||||
|
||||
private:
|
||||
NVMeController(PCI::Device& pci_device);
|
||||
virtual BAN::ErrorOr<void> initialize() override;
|
||||
|
||||
BAN::ErrorOr<void> identify_controller();
|
||||
BAN::ErrorOr<void> identify_namespaces();
|
||||
|
||||
BAN::ErrorOr<void> wait_until_ready(bool expected_value);
|
||||
BAN::ErrorOr<void> create_admin_queue();
|
||||
BAN::ErrorOr<void> create_io_queue();
|
||||
|
||||
private:
|
||||
PCI::Device& m_pci_device;
|
||||
BAN::UniqPtr<PCI::BarRegion> m_bar0;
|
||||
volatile NVMe::ControllerRegisters* m_controller_registers;
|
||||
|
||||
BAN::UniqPtr<NVMeQueue> m_admin_queue;
|
||||
BAN::UniqPtr<NVMeQueue> m_io_queue;
|
||||
|
||||
BAN::Vector<BAN::RefPtr<NVMeNamespace>> m_namespaces;
|
||||
|
||||
char m_name[20];
|
||||
const dev_t m_rdev;
|
||||
};
|
||||
|
||||
}
|
|
@ -0,0 +1,295 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace Kernel::NVMe
|
||||
{
|
||||
|
||||
struct CAP
|
||||
{
|
||||
uint64_t mqes : 16;
|
||||
uint64_t cqr : 1;
|
||||
uint64_t ams : 2;
|
||||
uint64_t __reserved0 : 5;
|
||||
uint64_t to : 8;
|
||||
uint64_t dstrd : 4;
|
||||
uint64_t nssrs : 1;
|
||||
uint64_t css : 8;
|
||||
uint64_t bps : 1;
|
||||
uint64_t cps : 2;
|
||||
uint64_t mpsmin : 4;
|
||||
uint64_t mpsmax : 4;
|
||||
uint64_t pmrs : 1;
|
||||
uint64_t cmpbs : 1;
|
||||
uint64_t nsss : 1;
|
||||
uint64_t crms : 2;
|
||||
uint64_t __reserved1 : 3;
|
||||
};
|
||||
static_assert(sizeof(CAP) == sizeof(uint64_t));
|
||||
|
||||
enum CAP_CSS
|
||||
{
|
||||
CAP_CSS_NVME = 1 << 0,
|
||||
CAP_CSS_IO = 1 << 6,
|
||||
CAP_CSS_ADMIN = 1 << 7,
|
||||
};
|
||||
|
||||
struct VS
|
||||
{
|
||||
uint32_t tertiary : 8;
|
||||
uint32_t minor : 8;
|
||||
uint32_t major : 16;
|
||||
};
|
||||
static_assert(sizeof(VS) == sizeof(uint32_t));
|
||||
|
||||
struct CC
|
||||
{
|
||||
uint32_t en : 1;
|
||||
uint32_t __reserved0 : 3;
|
||||
uint32_t css : 3;
|
||||
uint32_t mps : 4;
|
||||
uint32_t ams : 3;
|
||||
uint32_t shn : 2;
|
||||
uint32_t iosqes : 4;
|
||||
uint32_t iocqes : 4;
|
||||
uint32_t crime : 1;
|
||||
uint32_t __reserved1 : 7;
|
||||
};
|
||||
static_assert(sizeof(CC) == sizeof(uint32_t));
|
||||
|
||||
struct CSTS
|
||||
{
|
||||
uint32_t rdy : 1;
|
||||
uint32_t cfs : 1;
|
||||
uint32_t shts : 2;
|
||||
uint32_t nssro : 1;
|
||||
uint32_t pp : 1;
|
||||
uint32_t st : 1;
|
||||
uint32_t __reserved : 25;
|
||||
};
|
||||
static_assert(sizeof(CSTS) == sizeof(uint32_t));
|
||||
|
||||
struct AQA
|
||||
{
|
||||
uint32_t asqs : 12;
|
||||
uint32_t __reserved0 : 4;
|
||||
uint32_t acqs : 12;
|
||||
uint32_t __reserved1 : 4;
|
||||
};
|
||||
static_assert(sizeof(AQA) == sizeof(uint32_t));
|
||||
|
||||
// BAR0
|
||||
struct ControllerRegisters
|
||||
{
|
||||
CAP cap;
|
||||
VS vs;
|
||||
uint32_t intms;
|
||||
uint32_t intmc;
|
||||
CC cc;
|
||||
uint8_t __reserved0[4];
|
||||
CSTS csts;
|
||||
uint32_t nssr;
|
||||
AQA aqa;
|
||||
uint64_t asq;
|
||||
uint64_t acq;
|
||||
|
||||
static constexpr uint32_t SQ0TDBL = 0x1000;
|
||||
};
|
||||
static_assert(sizeof(ControllerRegisters) == 0x38);
|
||||
|
||||
struct DoorbellRegisters
|
||||
{
|
||||
uint32_t sq_tail;
|
||||
uint32_t cq_head;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct CompletionQueueEntry
|
||||
{
|
||||
uint32_t dontcare[3];
|
||||
uint16_t cid;
|
||||
uint16_t sts;
|
||||
} __attribute__((packed));
|
||||
static_assert(sizeof(CompletionQueueEntry) == 16);
|
||||
|
||||
struct DataPtr
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint64_t prp1;
|
||||
uint64_t prp2;
|
||||
};
|
||||
uint8_t sgl1[16];
|
||||
};
|
||||
};
|
||||
|
||||
struct CommandGeneric
|
||||
{
|
||||
uint32_t nsid;
|
||||
uint32_t cdw2;
|
||||
uint32_t cdw3;
|
||||
uint64_t mptr;
|
||||
DataPtr dptr;
|
||||
uint32_t cdw10;
|
||||
uint32_t cdw11;
|
||||
uint32_t cdw12;
|
||||
uint32_t cdw13;
|
||||
uint32_t cdw14;
|
||||
uint32_t cdw15;
|
||||
} __attribute__((packed));
|
||||
static_assert(sizeof(CommandGeneric) == 15 * sizeof(uint32_t));
|
||||
|
||||
struct CommandIdentify
|
||||
{
|
||||
uint32_t nsid;
|
||||
uint64_t __reserved0[2];
|
||||
DataPtr dptr;
|
||||
// dword 10
|
||||
uint8_t cns;
|
||||
uint8_t __reserved1;
|
||||
uint16_t cntid;
|
||||
// dword 11
|
||||
uint16_t cnsid;
|
||||
uint8_t __reserved2;
|
||||
uint8_t csi;
|
||||
// dword 12-15
|
||||
uint32_t __reserved3[4];
|
||||
} __attribute__((packed));
|
||||
static_assert(sizeof(CommandIdentify) == 15 * sizeof(uint32_t));
|
||||
|
||||
struct CommandCreateCQ
|
||||
{
|
||||
uint32_t __reserved0;
|
||||
uint64_t __reserved1[2];
|
||||
DataPtr dptr;
|
||||
// dword 10
|
||||
uint16_t qid;
|
||||
uint16_t qsize;
|
||||
// dword 11
|
||||
uint16_t pc : 1;
|
||||
uint16_t ien : 1;
|
||||
uint16_t __reserved2 : 14;
|
||||
uint16_t iv;
|
||||
// dword 12-15
|
||||
uint32_t __reserved4[4];
|
||||
} __attribute__((packed));
|
||||
static_assert(sizeof(CommandCreateCQ) == 15 * sizeof(uint32_t));
|
||||
|
||||
struct CommandCreateSQ
|
||||
{
|
||||
uint32_t __reserved0;
|
||||
uint64_t __reserved1[2];
|
||||
DataPtr dptr;
|
||||
// dword 10
|
||||
uint16_t qid;
|
||||
uint16_t qsize;
|
||||
// dword 11
|
||||
uint16_t pc : 1;
|
||||
uint16_t qprio : 2;
|
||||
uint16_t __reserved2 : 13;
|
||||
uint16_t cqid;
|
||||
// dword 12
|
||||
uint16_t nvmsetid;
|
||||
uint16_t __reserved4;
|
||||
// dword 13-15
|
||||
uint32_t __reserved5[3];
|
||||
} __attribute__((packed));
|
||||
static_assert(sizeof(CommandCreateSQ) == 15 * sizeof(uint32_t));
|
||||
|
||||
|
||||
struct CommandRead
|
||||
{
|
||||
uint32_t nsid;
|
||||
uint64_t __reserved0;
|
||||
uint64_t mptr;
|
||||
DataPtr dptr;
|
||||
// dword 10-11
|
||||
uint64_t slba;
|
||||
// dword 12
|
||||
uint16_t nlb;
|
||||
uint16_t __reserved1;
|
||||
// dword 13-15
|
||||
uint32_t __reserved2[3];
|
||||
} __attribute__((packed));
|
||||
static_assert(sizeof(CommandRead) == 15 * sizeof(uint32_t));
|
||||
|
||||
struct SubmissionQueueEntry
|
||||
{
|
||||
uint8_t opc;
|
||||
uint8_t fuse : 2;
|
||||
uint8_t __reserved : 4;
|
||||
uint8_t psdt : 2;
|
||||
uint16_t cid;
|
||||
union
|
||||
{
|
||||
CommandGeneric generic;
|
||||
CommandIdentify identify;
|
||||
CommandCreateCQ create_cq;
|
||||
CommandCreateSQ create_sq;
|
||||
CommandRead read;
|
||||
};
|
||||
} __attribute__((packed));
|
||||
static_assert(sizeof(SubmissionQueueEntry) == 64);
|
||||
|
||||
enum OPC : uint8_t
|
||||
{
|
||||
OPC_ADMIN_CREATE_SQ = 0x01,
|
||||
OPC_ADMIN_CREATE_CQ = 0x05,
|
||||
OPC_ADMIN_IDENTIFY = 0x06,
|
||||
OPC_IO_WRITE = 0x01,
|
||||
OPC_IO_READ = 0x02,
|
||||
};
|
||||
|
||||
enum CNS : uint8_t
|
||||
{
|
||||
CNS_INDENTIFY_NAMESPACE = 0x00,
|
||||
CNS_INDENTIFY_CONTROLLER = 0x01,
|
||||
CNS_INDENTIFY_ACTIVE_NAMESPACES = 0x02,
|
||||
};
|
||||
|
||||
struct NamespaceIdentify
|
||||
{
|
||||
uint64_t nsze;
|
||||
uint64_t ncap;
|
||||
uint64_t nuse;
|
||||
uint8_t nsfeat;
|
||||
uint8_t nlbaf;
|
||||
uint8_t flbas;
|
||||
uint8_t mc;
|
||||
uint8_t dpc;
|
||||
uint8_t dps;
|
||||
uint8_t nmic;
|
||||
uint8_t rescap;
|
||||
uint8_t fpi;
|
||||
uint8_t dlfeat;
|
||||
uint16_t nawun;
|
||||
uint16_t nawupf;
|
||||
uint16_t nacwu;
|
||||
uint16_t nabsn;
|
||||
uint16_t nabo;
|
||||
uint16_t nabspf;
|
||||
uint16_t noiob;
|
||||
uint64_t nvmcap[2];
|
||||
uint16_t npwg;
|
||||
uint16_t npwa;
|
||||
uint16_t npdg;
|
||||
uint16_t npda;
|
||||
uint16_t nows;
|
||||
uint16_t mssrl;
|
||||
uint32_t mcl;
|
||||
uint8_t msrc;
|
||||
uint8_t __reserved0[11];
|
||||
uint32_t adagrpid;
|
||||
uint8_t __reserved1[3];
|
||||
uint8_t nsattr;
|
||||
uint16_t nvmsetid;
|
||||
uint16_t endgid;
|
||||
uint64_t nguid[2];
|
||||
uint64_t eui64;
|
||||
uint32_t lbafN[64];
|
||||
uint8_t vendor_specific[3712];
|
||||
} __attribute__((packed));
|
||||
static_assert(sizeof(NamespaceIdentify) == 0x1000);
|
||||
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
#pragma once
|
||||
|
||||
#include <kernel/Memory/DMARegion.h>
|
||||
#include <kernel/Storage/StorageDevice.h>
|
||||
|
||||
namespace Kernel
|
||||
{
|
||||
|
||||
class NVMeController;
|
||||
|
||||
class NVMeNamespace : public StorageDevice
|
||||
{
|
||||
public:
|
||||
static BAN::ErrorOr<BAN::RefPtr<NVMeNamespace>> create(NVMeController&, uint32_t nsid, uint64_t block_count, uint32_t block_size);
|
||||
|
||||
virtual uint32_t sector_size() const override { return m_block_size; }
|
||||
virtual uint64_t total_size() const override { return m_block_size * m_block_count; }
|
||||
|
||||
virtual dev_t rdev() const override { return m_rdev; }
|
||||
virtual BAN::StringView name() const { return m_name; }
|
||||
|
||||
private:
|
||||
NVMeNamespace(NVMeController&, uint32_t nsid, uint64_t block_count, uint32_t block_size);
|
||||
BAN::ErrorOr<void> initialize();
|
||||
|
||||
virtual BAN::ErrorOr<void> read_sectors_impl(uint64_t lba, uint64_t sector_count, BAN::ByteSpan) override;
|
||||
virtual BAN::ErrorOr<void> write_sectors_impl(uint64_t lba, uint64_t sector_count, BAN::ConstByteSpan) override;
|
||||
|
||||
private:
|
||||
NVMeController& m_controller;
|
||||
BAN::UniqPtr<DMARegion> m_dma_region;
|
||||
|
||||
const uint32_t m_nsid;
|
||||
const uint32_t m_block_size;
|
||||
const uint64_t m_block_count;
|
||||
|
||||
char m_name[10] {};
|
||||
const dev_t m_rdev;
|
||||
};
|
||||
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
#pragma once
|
||||
|
||||
#include <BAN/UniqPtr.h>
|
||||
#include <BAN/Vector.h>
|
||||
#include <kernel/InterruptController.h>
|
||||
#include <kernel/Memory/DMARegion.h>
|
||||
#include <kernel/Semaphore.h>
|
||||
#include <kernel/Storage/NVMe/Definitions.h>
|
||||
|
||||
namespace Kernel
|
||||
{
|
||||
|
||||
class NVMeQueue : public Interruptable
|
||||
{
|
||||
public:
|
||||
NVMeQueue(BAN::UniqPtr<Kernel::DMARegion>&& cq, BAN::UniqPtr<Kernel::DMARegion>&& sq, volatile NVMe::DoorbellRegisters& db, uint32_t qdepth, uint8_t irq);
|
||||
|
||||
uint16_t submit_command(NVMe::SubmissionQueueEntry& sqe);
|
||||
|
||||
virtual void handle_irq() final override;
|
||||
|
||||
private:
|
||||
SpinLock m_lock;
|
||||
BAN::UniqPtr<Kernel::DMARegion> m_completion_queue;
|
||||
BAN::UniqPtr<Kernel::DMARegion> m_submission_queue;
|
||||
volatile NVMe::DoorbellRegisters& m_doorbell;
|
||||
const uint32_t m_qdepth;
|
||||
uint32_t m_sq_tail { 0 };
|
||||
uint32_t m_cq_head { 0 };
|
||||
uint16_t m_cq_valid_phase { 1 };
|
||||
|
||||
Semaphore m_semaphore;
|
||||
volatile uint16_t m_status;
|
||||
volatile bool m_done { false };
|
||||
};
|
||||
|
||||
}
|
|
@ -6,6 +6,7 @@
|
|||
#include <kernel/PCI.h>
|
||||
#include <kernel/Storage/ATA/AHCI/Controller.h>
|
||||
#include <kernel/Storage/ATA/ATAController.h>
|
||||
#include <kernel/Storage/NVMe/Controller.h>
|
||||
|
||||
#define INVALID_VENDOR 0xFFFF
|
||||
#define MULTI_FUNCTION 0x80
|
||||
|
@ -170,6 +171,14 @@ namespace Kernel::PCI
|
|||
if (auto res = ATAController::create(pci_device); res.is_error())
|
||||
dprintln("ATA: {}", res.error());
|
||||
break;
|
||||
case 0x08:
|
||||
// FIXME: HACK if inode initialization fails before it attaches to DevFS,
|
||||
// it will kernel panic. This is used to make nvme eternal
|
||||
if (auto res = NVMeController::create(pci_device); res.is_error())
|
||||
dprintln("NVMe: {}", res.error());
|
||||
else
|
||||
res.value()->ref();
|
||||
break;
|
||||
default:
|
||||
dprintln("unsupported storage device (pci {2H}.{2H}.{2H})", pci_device.class_code(), pci_device.subclass(), pci_device.prog_if());
|
||||
break;
|
||||
|
|
|
@ -0,0 +1,310 @@
|
|||
#include <BAN/Array.h>
|
||||
#include <kernel/FS/DevFS/FileSystem.h>
|
||||
#include <kernel/Memory/DMARegion.h>
|
||||
#include <kernel/Storage/NVMe/Controller.h>
|
||||
#include <kernel/Timer/Timer.h>
|
||||
|
||||
#include <sys/sysmacros.h>
|
||||
|
||||
#define DEBUG_NVMe 1
|
||||
|
||||
namespace Kernel
|
||||
{
|
||||
|
||||
static dev_t get_ctrl_dev_major()
|
||||
{
|
||||
static dev_t major = DevFileSystem::get().get_next_dev();
|
||||
return major;
|
||||
}
|
||||
|
||||
static dev_t get_ctrl_dev_minor()
|
||||
{
|
||||
static dev_t minor = 0;
|
||||
return minor++;
|
||||
}
|
||||
|
||||
BAN::ErrorOr<BAN::RefPtr<StorageController>> NVMeController::create(PCI::Device& pci_device)
|
||||
{
|
||||
auto* controller_ptr = new NVMeController(pci_device);
|
||||
if (controller_ptr == nullptr)
|
||||
return BAN::Error::from_errno(ENOMEM);
|
||||
auto controller = BAN::RefPtr<StorageController>::adopt(controller_ptr);
|
||||
TRY(controller->initialize());
|
||||
return controller;
|
||||
}
|
||||
|
||||
NVMeController::NVMeController(PCI::Device& pci_device)
|
||||
: CharacterDevice(0600, 0, 0)
|
||||
, m_pci_device(pci_device)
|
||||
, m_rdev(makedev(get_ctrl_dev_major(), get_ctrl_dev_minor()))
|
||||
{
|
||||
ASSERT(minor(m_rdev) < 10);
|
||||
strcpy(m_name, "nvmeX");
|
||||
m_name[4] = '0' + minor(m_rdev);
|
||||
}
|
||||
|
||||
BAN::ErrorOr<void> NVMeController::initialize()
|
||||
{
|
||||
// See NVM express base specification section 3.5.1
|
||||
m_pci_device.enable_bus_mastering();
|
||||
m_pci_device.enable_memory_space();
|
||||
|
||||
m_bar0 = TRY(m_pci_device.allocate_bar_region(0));
|
||||
if (m_bar0->type() != PCI::BarType::MEM)
|
||||
{
|
||||
dwarnln("NVMe controller BAR0 is not MEM");
|
||||
return BAN::Error::from_errno(EINVAL);
|
||||
}
|
||||
if (m_bar0->size() < 0x1000)
|
||||
{
|
||||
dwarnln("NVMe controller BAR0 is too small {} bytes", m_bar0->size());
|
||||
return BAN::Error::from_errno(EINVAL);
|
||||
}
|
||||
|
||||
m_controller_registers = reinterpret_cast<volatile NVMe::ControllerRegisters*>(m_bar0->vaddr());
|
||||
|
||||
const auto& vs = m_controller_registers->vs;
|
||||
if (vs.major != 1)
|
||||
{
|
||||
dwarnln("NVMe controller has unsupported version {}.{}", (uint16_t)vs.major, (uint8_t)vs.minor);
|
||||
return BAN::Error::from_errno(ENOTSUP);
|
||||
}
|
||||
|
||||
dprintln_if(DEBUG_NVMe, "NVMe controller");
|
||||
dprintln_if(DEBUG_NVMe, " version: {}.{}", (uint16_t)vs.major, (uint8_t)vs.minor);
|
||||
|
||||
auto& cap = m_controller_registers->cap;
|
||||
if (!(cap.css & NVMe::CAP_CSS_NVME))
|
||||
{
|
||||
dwarnln("NVMe controller does not support NVMe command set");
|
||||
return BAN::Error::from_errno(ECANCELED);
|
||||
}
|
||||
|
||||
const uint64_t min_page_size = 1ull << (12 + cap.mpsmin);
|
||||
const uint64_t max_page_size = 1ull << (12 + cap.mpsmax);
|
||||
if (PAGE_SIZE < min_page_size || PAGE_SIZE > max_page_size)
|
||||
{
|
||||
dwarnln("NVMe controller does not support {} byte pages, only {}-{} byte pages are supported", PAGE_SIZE, min_page_size, max_page_size);
|
||||
return BAN::Error::from_errno(ECANCELED);
|
||||
}
|
||||
|
||||
// One for aq and one for ioq
|
||||
TRY(m_pci_device.reserve_irqs(2));
|
||||
|
||||
auto& cc = m_controller_registers->cc;
|
||||
|
||||
if (cc.en)
|
||||
TRY(wait_until_ready(true));
|
||||
cc.en = 0;
|
||||
TRY(wait_until_ready(false));
|
||||
dprintln_if(DEBUG_NVMe, " controller reset");
|
||||
|
||||
TRY(create_admin_queue());
|
||||
dprintln_if(DEBUG_NVMe, " created admin queue");
|
||||
|
||||
// Configure controller
|
||||
cc.ams = 0;
|
||||
cc.mps = PAGE_SIZE_SHIFT - 12;
|
||||
cc.css = 0b000;
|
||||
|
||||
cc.en = 1;
|
||||
TRY(wait_until_ready(true));
|
||||
dprintln_if(DEBUG_NVMe, " controller enabled");
|
||||
|
||||
TRY(identify_controller());
|
||||
|
||||
cc.iocqes = 4; static_assert(1 << 4 == sizeof(NVMe::CompletionQueueEntry));
|
||||
cc.iosqes = 6; static_assert(1 << 6 == sizeof(NVMe::SubmissionQueueEntry));
|
||||
TRY(create_io_queue());
|
||||
dprintln_if(DEBUG_NVMe, " created io queue");
|
||||
|
||||
TRY(identify_namespaces());
|
||||
|
||||
DevFileSystem::get().add_device(this);
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
BAN::ErrorOr<void> NVMeController::wait_until_ready(bool expected_value)
|
||||
{
|
||||
const auto& cap = m_controller_registers->cap;
|
||||
const auto& csts = m_controller_registers->csts;
|
||||
|
||||
uint64_t timeout = SystemTimer::get().ms_since_boot() + 500 * cap.to;
|
||||
while (csts.rdy != expected_value)
|
||||
{
|
||||
if (SystemTimer::get().ms_since_boot() >= timeout)
|
||||
{
|
||||
dwarnln("NVMe controller reset timedout");
|
||||
return BAN::Error::from_errno(ETIMEDOUT);
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
BAN::ErrorOr<void> NVMeController::identify_controller()
|
||||
{
|
||||
auto dma_page = TRY(DMARegion::create(PAGE_SIZE));
|
||||
|
||||
NVMe::SubmissionQueueEntry sqe {};
|
||||
sqe.opc = NVMe::OPC_ADMIN_IDENTIFY;
|
||||
sqe.identify.dptr.prp1 = dma_page->paddr();
|
||||
sqe.identify.cns = NVMe::CNS_INDENTIFY_CONTROLLER;
|
||||
if (uint16_t status = m_admin_queue->submit_command(sqe))
|
||||
{
|
||||
dwarnln("NVMe controller identify failed (status {4H})", status);
|
||||
return BAN::Error::from_errno(EFAULT);
|
||||
}
|
||||
|
||||
if (*reinterpret_cast<uint16_t*>(dma_page->vaddr()) != m_pci_device.vendor_id())
|
||||
{
|
||||
dwarnln("NVMe controller vendor id does not match with the one in PCI");
|
||||
return BAN::Error::from_errno(EFAULT);
|
||||
}
|
||||
|
||||
dprintln_if(DEBUG_NVMe, " model: '{}'", BAN::StringView { (char*)dma_page->vaddr() + 24, 20 });
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
BAN::ErrorOr<void> NVMeController::identify_namespaces()
|
||||
{
|
||||
auto dma_page = TRY(DMARegion::create(PAGE_SIZE));
|
||||
|
||||
BAN::Vector<uint32_t> namespace_ids;
|
||||
TRY(namespace_ids.resize(PAGE_SIZE / sizeof(uint32_t)));
|
||||
|
||||
{
|
||||
NVMe::SubmissionQueueEntry sqe {};
|
||||
sqe.opc = NVMe::OPC_ADMIN_IDENTIFY;
|
||||
sqe.identify.dptr.prp1 = dma_page->paddr();
|
||||
sqe.identify.cns = NVMe::CNS_INDENTIFY_ACTIVE_NAMESPACES;
|
||||
if (uint16_t status = m_admin_queue->submit_command(sqe))
|
||||
{
|
||||
dwarnln("NVMe active namespace identify failed (status {4H})", status);
|
||||
return BAN::Error::from_errno(EFAULT);
|
||||
}
|
||||
memcpy(namespace_ids.data(), reinterpret_cast<void*>(dma_page->vaddr()), PAGE_SIZE);
|
||||
}
|
||||
|
||||
for (uint32_t nsid : namespace_ids)
|
||||
{
|
||||
if (nsid == 0)
|
||||
break;
|
||||
dprintln(" found namespace {}", nsid);
|
||||
|
||||
NVMe::SubmissionQueueEntry sqe {};
|
||||
sqe.opc = NVMe::OPC_ADMIN_IDENTIFY;
|
||||
sqe.identify.nsid = nsid;
|
||||
sqe.identify.dptr.prp1 = dma_page->paddr();
|
||||
sqe.identify.cns = NVMe::CNS_INDENTIFY_NAMESPACE;
|
||||
if (uint16_t status = m_admin_queue->submit_command(sqe))
|
||||
{
|
||||
dwarnln("NVMe namespace {} identify failed (status {4H})", nsid , status);
|
||||
return BAN::Error::from_errno(EFAULT);
|
||||
}
|
||||
|
||||
auto& namespace_info = *reinterpret_cast<volatile NVMe::NamespaceIdentify*>(dma_page->vaddr());
|
||||
|
||||
const uint64_t block_count = namespace_info.nsze;
|
||||
|
||||
const uint64_t format = namespace_info.lbafN[namespace_info.flbas & 0x0F];
|
||||
const uint64_t block_size = 1u << ((format >> 16) & 0xFF);
|
||||
|
||||
dprintln(" block count {}", block_count);
|
||||
dprintln(" block size {} B", block_size);
|
||||
dprintln(" total {} MiB", block_count * block_size / (1 << 20));
|
||||
|
||||
auto ns = TRY(NVMeNamespace::create(*this, nsid, block_count, block_size));
|
||||
TRY(m_namespaces.push_back(BAN::move(ns)));
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
BAN::ErrorOr<void> NVMeController::create_admin_queue()
|
||||
{
|
||||
const uint32_t admin_queue_depth = BAN::Math::min(PAGE_SIZE / sizeof(NVMe::CompletionQueueEntry), PAGE_SIZE / sizeof(NVMe::SubmissionQueueEntry));
|
||||
auto& aqa = m_controller_registers->aqa;
|
||||
aqa.acqs = admin_queue_depth - 1;
|
||||
aqa.asqs = admin_queue_depth - 1;
|
||||
dprintln_if(DEBUG_NVMe, " admin queue depth is {}", admin_queue_depth);
|
||||
|
||||
const uint32_t completion_queue_size = admin_queue_depth * sizeof(NVMe::CompletionQueueEntry);
|
||||
auto completion_queue = TRY(DMARegion::create(completion_queue_size));
|
||||
memset((void*)completion_queue->vaddr(), 0x00, completion_queue->size());
|
||||
|
||||
const uint32_t submission_queue_size = admin_queue_depth * sizeof(NVMe::SubmissionQueueEntry);
|
||||
auto submission_queue = TRY(DMARegion::create(submission_queue_size));
|
||||
memset((void*)submission_queue->vaddr(), 0x00, submission_queue->size());
|
||||
|
||||
m_controller_registers->acq = completion_queue->paddr();
|
||||
m_controller_registers->asq = submission_queue->paddr();
|
||||
|
||||
uint8_t irq = m_pci_device.get_irq(0);
|
||||
dprintln_if(DEBUG_NVMe, " admin queue using irq {}", irq);
|
||||
|
||||
auto& doorbell = *reinterpret_cast<volatile NVMe::DoorbellRegisters*>(m_bar0->vaddr() + NVMe::ControllerRegisters::SQ0TDBL);
|
||||
|
||||
m_admin_queue = TRY(BAN::UniqPtr<NVMeQueue>::create(BAN::move(completion_queue), BAN::move(submission_queue), doorbell, admin_queue_depth, irq));
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
BAN::ErrorOr<void> NVMeController::create_io_queue()
|
||||
{
|
||||
constexpr uint32_t queue_size = PAGE_SIZE;
|
||||
constexpr uint32_t queue_elems = queue_size / BAN::Math::max(sizeof(NVMe::CompletionQueueEntry), sizeof(NVMe::SubmissionQueueEntry));
|
||||
auto completion_queue = TRY(DMARegion::create(queue_size));
|
||||
memset((void*)completion_queue->vaddr(), 0x00, completion_queue->size());
|
||||
|
||||
auto submission_queue = TRY(DMARegion::create(queue_size));
|
||||
memset((void*)submission_queue->vaddr(), 0x00, submission_queue->size());
|
||||
|
||||
{
|
||||
NVMe::SubmissionQueueEntry sqe {};
|
||||
sqe.opc = NVMe::OPC_ADMIN_CREATE_CQ;
|
||||
sqe.create_cq.dptr.prp1 = completion_queue->paddr();
|
||||
sqe.create_cq.qsize = queue_elems - 1;
|
||||
sqe.create_cq.qid = 1;
|
||||
sqe.create_cq.iv = 1;
|
||||
sqe.create_cq.ien = 1;
|
||||
sqe.create_cq.pc = 1;
|
||||
if (uint16_t status = m_admin_queue->submit_command(sqe))
|
||||
{
|
||||
dwarnln("NVMe io completion queue creation failed (status {4H})", status);
|
||||
return BAN::Error::from_errno(EFAULT);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
NVMe::SubmissionQueueEntry sqe {};
|
||||
sqe.opc = NVMe::OPC_ADMIN_CREATE_SQ;
|
||||
sqe.create_sq.dptr.prp1 = submission_queue->paddr();
|
||||
sqe.create_sq.qsize = queue_elems - 1;
|
||||
sqe.create_sq.qid = 1;
|
||||
sqe.create_sq.cqid = 1;
|
||||
sqe.create_sq.qprio = 0;
|
||||
sqe.create_sq.pc = 1;
|
||||
sqe.create_sq.nvmsetid = 0;
|
||||
if (uint16_t status = m_admin_queue->submit_command(sqe))
|
||||
{
|
||||
dwarnln("NVMe io submission queue creation failed (status {4H})", status);
|
||||
return BAN::Error::from_errno(EFAULT);
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t irq = m_pci_device.get_irq(1);
|
||||
dprintln_if(DEBUG_NVMe, " io queue using irq {}", irq);
|
||||
|
||||
const uint32_t doorbell_stride = 1 << (2 + m_controller_registers->cap.dstrd);
|
||||
const uint32_t doorbell_offset = 2 * doorbell_stride;
|
||||
auto& doorbell = *reinterpret_cast<volatile NVMe::DoorbellRegisters*>(m_bar0->vaddr() + NVMe::ControllerRegisters::SQ0TDBL + doorbell_offset);
|
||||
|
||||
m_io_queue = TRY(BAN::UniqPtr<NVMeQueue>::create(BAN::move(completion_queue), BAN::move(submission_queue), doorbell, queue_elems, irq));
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
#include <kernel/FS/DevFS/FileSystem.h>
|
||||
#include <kernel/Storage/NVMe/Controller.h>
|
||||
#include <kernel/Storage/NVMe/Namespace.h>
|
||||
|
||||
#include <sys/sysmacros.h>
|
||||
|
||||
namespace Kernel
|
||||
{
|
||||
|
||||
static dev_t get_ns_dev_major()
|
||||
{
|
||||
static dev_t major = DevFileSystem::get().get_next_dev();
|
||||
return major;
|
||||
}
|
||||
|
||||
static dev_t get_ns_dev_minor()
|
||||
{
|
||||
static dev_t minor = 0;
|
||||
return minor++;
|
||||
}
|
||||
|
||||
BAN::ErrorOr<BAN::RefPtr<NVMeNamespace>> NVMeNamespace::create(NVMeController& controller, uint32_t nsid, uint64_t block_count, uint32_t block_size)
|
||||
{
|
||||
auto* namespace_ptr = new NVMeNamespace(controller, nsid, block_count, block_size);
|
||||
if (namespace_ptr == nullptr)
|
||||
return BAN::Error::from_errno(ENOMEM);
|
||||
auto ns = BAN::RefPtr<NVMeNamespace>::adopt(namespace_ptr);
|
||||
TRY(ns->initialize());
|
||||
return ns;
|
||||
}
|
||||
|
||||
NVMeNamespace::NVMeNamespace(NVMeController& controller, uint32_t nsid, uint64_t block_count, uint32_t block_size)
|
||||
: m_controller(controller)
|
||||
, m_nsid(nsid)
|
||||
, m_block_size(block_size)
|
||||
, m_block_count(block_count)
|
||||
, m_rdev(makedev(get_ns_dev_major(), get_ns_dev_minor()))
|
||||
{
|
||||
ASSERT(minor(m_rdev) < 10);
|
||||
ASSERT(m_controller.name().size() + 2 < sizeof(m_name));
|
||||
memcpy(m_name, m_controller.name().data(), m_controller.name().size());
|
||||
m_name[m_controller.name().size() + 0] = 'n';
|
||||
m_name[m_controller.name().size() + 1] = '1' + minor(m_rdev);
|
||||
m_name[m_controller.name().size() + 2] = '\0';
|
||||
}
|
||||
|
||||
BAN::ErrorOr<void> NVMeNamespace::initialize()
|
||||
{
|
||||
m_dma_region = TRY(DMARegion::create(PAGE_SIZE));
|
||||
|
||||
add_disk_cache();
|
||||
|
||||
DevFileSystem::get().add_device(this);
|
||||
|
||||
char name_prefix[20];
|
||||
strcpy(name_prefix, m_name);
|
||||
strcat(name_prefix, "p");
|
||||
if (auto res = initialize_partitions(name_prefix); res.is_error())
|
||||
dprintln("{}", res.error());
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
BAN::ErrorOr<void> NVMeNamespace::read_sectors_impl(uint64_t lba, uint64_t sector_count, BAN::ByteSpan buffer)
|
||||
{
|
||||
ASSERT(buffer.size() >= sector_count * m_block_size);
|
||||
|
||||
for (uint64_t i = 0; i < sector_count;)
|
||||
{
|
||||
uint16_t count = BAN::Math::min(sector_count - i, m_dma_region->size() / m_block_size);
|
||||
|
||||
NVMe::SubmissionQueueEntry sqe {};
|
||||
sqe.opc = NVMe::OPC_IO_READ;
|
||||
sqe.read.nsid = m_nsid;
|
||||
sqe.read.dptr.prp1 = m_dma_region->paddr();
|
||||
sqe.read.slba = lba + i;
|
||||
sqe.read.nlb = count - 1;
|
||||
if (uint16_t status = m_controller.io_queue().submit_command(sqe))
|
||||
{
|
||||
dwarnln("NVMe read failed (status {4H})", status);
|
||||
return BAN::Error::from_errno(EIO);
|
||||
}
|
||||
memcpy(buffer.data() + i * m_block_size, reinterpret_cast<void*>(m_dma_region->vaddr()), count * m_block_size);
|
||||
|
||||
i += count;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
BAN::ErrorOr<void> NVMeNamespace::write_sectors_impl(uint64_t lba, uint64_t sector_count, BAN::ConstByteSpan buffer)
|
||||
{
|
||||
ASSERT(buffer.size() >= sector_count * m_block_size);
|
||||
|
||||
for (uint64_t i = 0; i < sector_count;)
|
||||
{
|
||||
uint16_t count = BAN::Math::min(sector_count - i, m_dma_region->size() / m_block_size);
|
||||
|
||||
memcpy(reinterpret_cast<void*>(m_dma_region->vaddr()), buffer.data() + i * m_block_size, count * m_block_size);
|
||||
|
||||
NVMe::SubmissionQueueEntry sqe {};
|
||||
sqe.opc = NVMe::OPC_IO_WRITE;
|
||||
sqe.read.nsid = m_nsid;
|
||||
sqe.read.dptr.prp1 = m_dma_region->paddr();
|
||||
sqe.read.slba = lba + i;
|
||||
sqe.read.nlb = count - 1;
|
||||
if (uint16_t status = m_controller.io_queue().submit_command(sqe))
|
||||
{
|
||||
dwarnln("NVMe write failed (status {4H})", status);
|
||||
return BAN::Error::from_errno(EIO);
|
||||
}
|
||||
|
||||
i += count;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
#include <kernel/LockGuard.h>
|
||||
#include <kernel/Scheduler.h>
|
||||
#include <kernel/Storage/NVMe/Queue.h>
|
||||
#include <kernel/Timer/Timer.h>
|
||||
|
||||
namespace Kernel
|
||||
{
|
||||
|
||||
static constexpr uint64_t s_nvme_command_timeout_ms = 1000;
|
||||
static constexpr uint64_t s_nvme_command_poll_timeout_ms = 20;
|
||||
|
||||
NVMeQueue::NVMeQueue(BAN::UniqPtr<Kernel::DMARegion>&& cq, BAN::UniqPtr<Kernel::DMARegion>&& sq, volatile NVMe::DoorbellRegisters& db, uint32_t qdepth, uint8_t irq)
|
||||
: m_completion_queue(BAN::move(cq))
|
||||
, m_submission_queue(BAN::move(sq))
|
||||
, m_doorbell(db)
|
||||
, m_qdepth(qdepth)
|
||||
{
|
||||
set_irq(irq);
|
||||
enable_interrupt();
|
||||
}
|
||||
|
||||
void NVMeQueue::handle_irq()
|
||||
{
|
||||
auto* cq_ptr = reinterpret_cast<NVMe::CompletionQueueEntry*>(m_completion_queue->vaddr());
|
||||
|
||||
while ((cq_ptr[m_cq_head].sts & 1) == m_cq_valid_phase)
|
||||
{
|
||||
uint16_t sts = cq_ptr[m_cq_head].sts >> 1;
|
||||
uint16_t cid = cq_ptr[m_cq_head].cid;
|
||||
ASSERT(cid == 0);
|
||||
|
||||
ASSERT(!m_done);
|
||||
m_status = sts;
|
||||
m_done = true;
|
||||
m_semaphore.unblock();
|
||||
|
||||
m_cq_head = (m_cq_head + 1) % m_qdepth;
|
||||
if (m_cq_head == 0)
|
||||
m_cq_valid_phase ^= 1;
|
||||
}
|
||||
|
||||
m_doorbell.cq_head = m_cq_head;
|
||||
}
|
||||
|
||||
uint16_t NVMeQueue::submit_command(NVMe::SubmissionQueueEntry& sqe)
|
||||
{
|
||||
LockGuard _(m_lock);
|
||||
|
||||
ASSERT(m_done == false);
|
||||
m_status = 0;
|
||||
|
||||
sqe.cid = 0;
|
||||
|
||||
auto* sqe_ptr = reinterpret_cast<NVMe::SubmissionQueueEntry*>(m_submission_queue->vaddr());
|
||||
memcpy(&sqe_ptr[m_sq_tail], &sqe, sizeof(NVMe::SubmissionQueueEntry));
|
||||
m_sq_tail = (m_sq_tail + 1) % m_qdepth;
|
||||
m_doorbell.sq_tail = m_sq_tail;
|
||||
|
||||
const uint64_t start_time = SystemTimer::get().ms_since_boot();
|
||||
while (SystemTimer::get().ms_since_boot() < start_time + s_nvme_command_poll_timeout_ms)
|
||||
{
|
||||
if (!m_done)
|
||||
continue;
|
||||
m_done = false;
|
||||
return m_status;
|
||||
}
|
||||
|
||||
while (SystemTimer::get().ms_since_boot() < start_time + s_nvme_command_timeout_ms)
|
||||
{
|
||||
if (!m_done)
|
||||
{
|
||||
m_semaphore.block();
|
||||
continue;
|
||||
}
|
||||
m_done = false;
|
||||
return m_status;
|
||||
}
|
||||
|
||||
return 0xFFFF;
|
||||
}
|
||||
|
||||
}
|
|
@ -13,11 +13,16 @@ if (($BANAN_UEFI_BOOT)); then
|
|||
BIOS_ARGS="-bios $OVMF_PATH -net none"
|
||||
fi
|
||||
|
||||
if [[ $BANAN_DISK_TYPE == "NVME" ]]; then
|
||||
DISK_ARGS="-device nvme,serial=deadbeef,drive=disk"
|
||||
else
|
||||
DISK_ARGS="-device ahci,id=ahci -device ide-hd,drive=disk,bus=ahci.0"
|
||||
fi
|
||||
|
||||
qemu-system-$BANAN_ARCH \
|
||||
-m 128 \
|
||||
-smp 2 \
|
||||
$BIOS_ARGS \
|
||||
-drive format=raw,id=disk,file=${BANAN_DISK_IMAGE_PATH},if=none \
|
||||
-device ahci,id=ahci \
|
||||
-device ide-hd,drive=disk,bus=ahci.0 \
|
||||
$DISK_ARGS \
|
||||
$@ \
|
||||
|
|
Loading…
Reference in New Issue