diff --git a/Driver/pcie40_driver/daq_emu.c b/Driver/pcie40_driver/daq_emu.c new file mode 100644 index 0000000000000000000000000000000000000000..ea5722f81dda7e81ecae182faeae8e30a2789c46 --- /dev/null +++ b/Driver/pcie40_driver/daq_emu.c @@ -0,0 +1,494 @@ +//p40emu``+ +#define P40_FMT "P40DAQemu:%s(): " +#define PCIE40_DAQ_CLASS "lhcb_pcie40_daq_emu" +#define PCIE40_EMU + +#include <linux/delay.h> +#include <linux/kthread.h> +#include <linux/types.h> + +struct pcie40_dma_map; +static int dma_map_emu_alloc(struct pcie40_dma_map* map, void *base, size_t max_entries, size_t desired_size); +static int dma_map_emu_free(struct pcie40_dma_map* map); + +#include "daq.h" + +extern int mpf; + +static int dma_map_emu_alloc(struct pcie40_dma_map* map, void *base, size_t max_entries, size_t desired_size) +{ + int i; + + map->base = base; + map->max_entries = max_entries; + map->num_entries = 0; + map->size = 0; + map->entries = kzalloc(sizeof(struct pcie40_dma_buffer)*max_entries, GFP_KERNEL); + + printk(P40_DIAG "entries = 0x%p\n", P40_PARM, map->entries); + + for (i = 0; i < map->max_entries; ++i) { + size_t alloc_size = desired_size - map->size > PCI_MAX_ALLOC + ? PCI_MAX_ALLOC + : desired_size - map->size; + while (1) { + struct pcie40_dma_buffer* buffer = map->entries + i; + buffer->size = alloc_size; + buffer->ptr = kzalloc(buffer->size, GFP_KERNEL); + //printk("...%zu", buffer->size); + buffer->start = virt_to_phys(buffer->ptr); + if (buffer->ptr == NULL) { + if (buffer->size < 1024*1024) { + printk("...NOT ENOUGH MEMORY!\n"); + map->num_entries = i; + dma_map_emu_free(map); + return -1; + } + alloc_size /= 2; + } else { + map->size += buffer->size; + break; + } + } + + if (map->size >= desired_size) { + ++i; + break; + } + } + map->num_entries = i; + + return 0; +} + +static int dma_map_emu_free(struct pcie40_dma_map* map) +{ + int i; + for (i = 0; i < map->num_entries; ++i) { + struct pcie40_dma_buffer* buffer = map->entries + i; + kfree(buffer->ptr); + } + kfree(map->entries); + map->entries = NULL; + + return 0; +} + +static void dma_stream_set_write_off(struct pcie40_dma_stream *stream, uint32_t write_off) +{ + //TODO: spin lock to serialize access! + stream->write_off = write_off; + pcie40_write32_stream(stream, P40_DMA_DAQ_STREAM_OFF_HOST_BUF_WRITE_OFF, write_off); +} + +//+`dma_stream_emu_write` Emulate a dma write into a given stream. +static ssize_t dma_stream_emu_write( + struct pcie40_dma_stream *stream, int *map_idx, uint32_t *buf_off, const void *from, size_t bytes)//;?> +//>`stream` Stream to write to. +//><`map_idx` Index of current DMA buffer, must be initialized to 0. +//><`buf_off` Offset within current DMA buffer, must be initialized to 0. +//>`from` Pointer to data to write, if NULL, zeroes will be written instead. +//>`bytes` Number of bytes to write. +{ + uint32_t write_off, write_off_next; + ssize_t bytes_written = 0; + //? The write is started only if sufficient space is available. + size_t bytes_free = dma_stream_get_bytes_free(stream); + if (bytes_free <= bytes) { + //? If not, a negative value is immediately returned. Its magnitude is the number of bytes missing. + return bytes_free - bytes; + } + // if (write_off + 32 == (read_off & ~0x1F)) + + //? A write is split across multiple DMA buffers if the current one does not have sufficient space available. + while (bytes_written < bytes) { + struct pcie40_dma_buffer *buf = stream->map.entries + *map_idx; + size_t bytes_left = bytes - bytes_written; + if (*buf_off + bytes_left <= buf->size) { + if (from) { + memcpy(buf->ptr + *buf_off, from + bytes_written, bytes_left); + } else { + memset(buf->ptr + *buf_off, 0, bytes_left); + } + bytes_written += bytes_left; + *buf_off += bytes_left; + } else { + size_t bytes_avail = buf->size - *buf_off; + if (from) { + memcpy(buf->ptr + *buf_off, from + bytes_written, bytes_avail); + } else { + memset(buf->ptr + *buf_off, 0, bytes_avail); + } + *map_idx = (*map_idx + 1) % stream->map.num_entries; + bytes_written += bytes_avail; + *buf_off = 0; + } + } + spin_lock_irqsave(&stream->off_lock, stream->off_flags); + write_off = dma_stream_get_write_off(stream); + write_off_next = (write_off + bytes) % stream->map.size; + dma_stream_set_write_off(stream, write_off_next); + spin_unlock_irqrestore(&stream->off_lock, stream->off_flags); + return bytes; +} + +//+`daq_emu_thread` Thread loop generating emulated board data. +//>`data` Opaque pointer to interface state +static int daq_emu_thread(void *data) //?> +{ + struct pcie40_daq_state *state = data; + uint64_t evid_frg = 0; + int main_buf = 0; + uint32_t main_off = 0; + int meta_buf = 0; + uint32_t meta_off = 0; + int meta_idx = 0; + ssize_t write_status = 0; + enum { GEN_MHDR, GEN_FRG, GEN_META, GEN_MPAD } gen_state = GEN_MHDR; + + //? One instance of this thread is spawned for each emulated interface. + printk(P40_INFO "starting emulator thread for interface %d\n", + P40_PARM, state->common->dev_id); + + pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_META_PACKING, mpf); + + //? The thread loops until the module is unloaded. <? + while (!kthread_should_stop()) { //... } ?> + uint32_t mmr_reset, mmr_main_gen_ctl, mmr_main_gen_fixed, mmr_main_raw_mode, mmr_main_enable, mmr_meta_enable; + + struct pcie40_dma_buffer *main_buffer = state->main_stream.map.entries + main_buf; + struct pcie40_dma_buffer *meta_buffer = state->meta_stream.map.entries + meta_buf; + + //? Every loop iteration reads the emulated control registers <? + mmr_reset = pcie40_read32_ctrl(state->common, P40_DMA_CTRL_OFF_RESET); + mmr_main_gen_ctl = pcie40_read32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_GEN_CTL); + mmr_main_gen_fixed = pcie40_read32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_GEN_FIXED); + mmr_main_raw_mode = pcie40_read32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_RAW_MODE); + mmr_main_enable = pcie40_read32_stream(&state->main_stream, P40_DMA_DAQ_STREAM_OFF_ENABLE); + mmr_meta_enable = pcie40_read32_stream(&state->meta_stream, P40_DMA_DAQ_STREAM_OFF_ENABLE); + mpf = pcie40_read32_ctrl(state->common, P40_DMA_CTRL_OFF_META_PACKING); + //?>Then uses these values to drive its behaviour for the rest of the current emulation cycle. + + if (mmr_reset & (1 << P40_RST_BIT_DEFAULT)) { + printk(P40_INFO "%d: reset default\n", P40_PARM, state->common->dev_id); + + mmr_main_gen_ctl &= ~(1 << P40_MAIN_GEN_BIT_ENABLE); + mmr_main_gen_ctl &= ~(1 << P40_MAIN_GEN_BIT_FIXED); + pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_GEN_CTL, mmr_main_gen_ctl); + + mmr_main_gen_fixed = 0x6243484C; + pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_GEN_CTL, mmr_main_gen_fixed); + + mmr_main_enable = 1; + pcie40_write32_stream(&state->main_stream, P40_DMA_DAQ_STREAM_OFF_ENABLE, mmr_main_enable); + mmr_meta_enable = 0; + pcie40_write32_stream(&state->meta_stream, P40_DMA_DAQ_STREAM_OFF_ENABLE, mmr_meta_enable); + + mmr_reset |= (1 << P40_RST_BIT_LOGIC); + } + if (mmr_reset & (1 << P40_RST_BIT_LOGIC)) { + printk(P40_INFO "%d: reset logic\n", P40_PARM, state->common->dev_id); + + mmr_main_gen_ctl &= ~(1 << P40_MAIN_GEN_BIT_RUNNING); + pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_GEN_CTL, mmr_main_gen_ctl); + + dma_stream_set_read_off(&state->main_stream, 0); + dma_stream_set_write_off(&state->main_stream, 0); + + dma_stream_set_read_off(&state->meta_stream, 0); + dma_stream_set_write_off(&state->meta_stream, 0); + + gen_state = GEN_MHDR; + } + if (mmr_reset & (1 << P40_RST_BIT_FLUSH)) { + printk(P40_INFO "%d: reset flush\n", P40_PARM, state->common->dev_id); + } + pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_RESET, 0); + +retry: + //Jump to this point to retry the last operation without re-reading the registers + // (useful in the middle of flushing, once the reset register has been zeroed but we're not done flushing yet) + + if (!(mmr_main_gen_ctl & (1 << P40_MAIN_GEN_BIT_ENABLE))) { + evid_frg = 0; + main_buf = 0; + main_off = 0; + meta_buf = 0; + meta_off = 0; + meta_idx = 0; + gen_state = GEN_MHDR; + } + + if ((mmr_main_enable == 1) && (mmr_main_gen_ctl & (1 << P40_MAIN_GEN_BIT_ENABLE))) { + + struct __attribute__((__packed__)) { + uint64_t evid; + uint32_t ghdr; + uint8_t data[128 - sizeof(uint64_t) - sizeof(uint32_t)]; //Must be aligned to 32 bytes + } frg_nometa; + + struct __attribute__((__packed__)) { + char magic[6]; + uint16_t frags; + uint64_t evid; + uint64_t offset; + } meta_hdr; + + struct __attribute__((__packed__)) { + uint32_t ghdr; + uint8_t data[104 - sizeof(uint32_t)]; //Must be aligned to 8 bytes + } frg; + + uint16_t meta; + size_t meta_size = sizeof(meta_hdr) + mpf * sizeof(meta); + size_t meta_left = (mpf - meta_idx) * sizeof(meta); + size_t meta_pad = meta_size % 8 ? (meta_size - meta_size % 8) : 0; //Metadata is also aligned to 8 bytes + + if (mmr_reset & (1 << P40_RST_BIT_FLUSH)) { + if (mmr_meta_enable) { + switch (gen_state) { + case GEN_MHDR: + // Just generate a dummy header with an evid of 0xFF... + evid_frg = 0; + meta_idx = 0; + memcpy(meta_hdr.magic, "META40", sizeof(meta_hdr.magic)); + meta_hdr.evid = 0xFFFFFFFFFFFFFFFF; + meta_hdr.offset = main_off; + write_status = dma_stream_emu_write(&state->meta_stream, + &meta_buf, &meta_off, &meta_hdr, sizeof(meta_hdr)); + if (write_status < 0) { + msleep_interruptible(1); + goto retry; + } + break; + case GEN_FRG: + //Before the flush we were supposed to emit a fragment, now we aren't anymore + //just fallthrough to the next case + case GEN_META: + //Also nothing to do here, no new fragments to create metadata for + //just fallthrough to the next case + case GEN_MPAD: + write_status = dma_stream_emu_write(&state->meta_stream, + &meta_buf, &meta_off, NULL, meta_left + meta_pad); + if (write_status >= 0) { + msleep_interruptible(1); + goto retry; + } + break; + } + } else { + frg_nometa.evid = 0xFFFFFFFFFFFFFFFF; + frg_nometa.ghdr = ((100 * 8) & 0xFFFFF) | ((evid_frg & 0xFFF) << 20); //Make them 100 bytes + memset(frg_nometa.data, 0, sizeof(frg_nometa.data)); + write_status = dma_stream_emu_write(&state->main_stream, + &main_buf, &main_off, &frg_nometa, sizeof(frg_nometa)); + if (write_status > 0) { + evid_frg = 0; + } + } + } else //Not flushing + if (mmr_main_gen_ctl & (1 << P40_MAIN_GEN_BIT_RUNNING)) { + if (mmr_main_gen_ctl & (1 << P40_MAIN_GEN_BIT_FIXED)) { + // Generate fixed pattern + uint32_t pattern[4]; + mmr_main_raw_mode = 1; + pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_RAW_MODE, mmr_main_raw_mode); + //Because the FPGA writes payloads in BE + pattern[0] = pattern[1] = pattern[2] = pattern[3] = cpu_to_be32(mmr_main_gen_fixed); + + write_status = dma_stream_emu_write(&state->main_stream, + &main_buf, &main_off, &pattern, sizeof(pattern)); + } else { + // Generate fragments + mmr_main_raw_mode = 0; + pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_RAW_MODE, mmr_main_raw_mode); + + // If the metadata stream is enabled, use it for metadata + if (mmr_meta_enable) { + switch (gen_state) { + case GEN_MHDR: + meta_idx = 0; + memcpy(meta_hdr.magic, "META40", sizeof(meta_hdr.magic)); + meta_hdr.frags = mpf; + meta_hdr.evid = evid_frg; + meta_hdr.offset = main_off; + write_status = dma_stream_emu_write(&state->meta_stream, + &meta_buf, &meta_off, &meta_hdr, sizeof(meta_hdr)); + if (write_status > 0) { + gen_state = GEN_FRG; + } + break; + case GEN_FRG: + frg.ghdr = ((100 * 8) & 0xFFFFF) | ((evid_frg & 0xFFF) << 20); //Make them 100 bytes + memset(frg.data, (uint8_t)evid_frg, sizeof(frg.data)); + write_status = dma_stream_emu_write(&state->main_stream, + &main_buf, &main_off, &frg, sizeof(frg)); + if (write_status > 0) { + ++meta_idx; + gen_state = GEN_META; + } + break; + case GEN_META: + meta = (frg.ghdr & 0xFFFFF) / 8; //Turn into bytes and pad to 8 + if (meta % 8) meta += 8 - meta % 8; + write_status = dma_stream_emu_write(&state->meta_stream, + &meta_buf, &meta_off, &meta, sizeof(meta)); + if (write_status > 0) { + if (meta_idx < mpf) { + gen_state = GEN_FRG; + } else { + gen_state = GEN_MPAD; + } + } + break; + case GEN_MPAD: + write_status = dma_stream_emu_write(&state->meta_stream, + &meta_buf, &meta_off, NULL, meta_pad); + if (write_status >= 0) { + gen_state = GEN_MHDR; + } + break; + } + } else { + // Otherwise generate only fragments + frg_nometa.evid = evid_frg; + frg_nometa.ghdr = ((100 * 8) & 0xFFFFF) | ((evid_frg & 0xFFF) << 20); //Make them 100 bytes + memset(frg_nometa.data, (uint8_t)evid_frg, sizeof(frg_nometa.data)); + write_status = dma_stream_emu_write(&state->main_stream, + &main_buf, &main_off, &frg_nometa, sizeof(frg_nometa)); + if (write_status > 0) { + ++evid_frg; + } + } + } + } + } + //XXX: Apparently we soft-lockup the kernel if we do not yield at least from time to time? + if (write_status < 0) { + msleep_interruptible(1); + } else { + yield(); + } + } + + printk(P40_INFO "stopping emulator thread for interface %d\n", + P40_PARM, state->common->dev_id); + return 0; +} + +int pcie40_daq_emu_probe(struct pcie40_state *common) +{ + int rc = 0; + + struct pcie40_daq_state *state = NULL; + uint32_t regmap_version, fpga_version; + + state = kzalloc(sizeof(struct pcie40_daq_state), GFP_KERNEL); + if (IS_ERR(state)) { + printk(P40_ERR "kzalloc()\n", P40_PARM); + rc = PTR_ERR(state); + goto err_kzalloc; + } + state->common = common; + printk(P40_DIAG "state = 0x%p\n", P40_PARM, state); + + init_waitqueue_head(&state->wait); + + rc = alloc_chrdev_region(&(state->dev_num), P40_DAQ_CDEV_BASEMINOR, P40_DAQ_CDEV_COUNT, P40_DRV_NAME); + if (rc < 0) { + printk(P40_ERR "alloc_chrdev_region()\n", P40_PARM); + goto err_alloc_chrdev_region; + } + regmap_version = pcie40_read32_ctrl(state->common, P40_DMA_CTRL_OFF_REGMAP); + printk(P40_INFO "Register map version: 0x%08X\n", P40_PARM, regmap_version); + + fpga_version = pcie40_read32_ctrl(state->common, P40_DMA_CTRL_OFF_VERSION); + printk(P40_INFO "FPGA core version: %X.%02X (%04X)\n", P40_PARM, + fpga_version >> 24, (fpga_version >> 16) & 0xFF, fpga_version & 0xFFFF); + + // CTRL endpoint + rc = pcie40_setup_cdev(pcie40_daq_class, &(state->ctrl_cdev), state->dev_num, CTRL_CDEV_MINOR, 1, CTRL_CDEV_NAME, state->common->dev_id, &ctrl_file_ops); + if (rc < 0) { + goto err_dev_ctrl; + } + + // TODO: the streams should be configured only when the corresponding device is opened (so, if we're not using metadata or odin, that memory will not be allocated at all) + + if (mainmibs < 0 || mainmibs > MAIN_BUF_MIBS_MAX) { + mainmibs = MAIN_BUF_MIBS_MAX; + } + + // MAIN stream + state->main_stream.cdev_name = MAIN_CDEV_NAME; + state->main_stream.cdev_minor = MAIN_CDEV_MINOR; + state->main_stream.regs_base = P40_DMA_DAQ_MAIN_STREAM_QSYS_BASE; + state->main_stream.state = state; + rc = dma_stream_configure(state->common->dev_id, &state->main_stream, + P40_DMA_DAQ_MAIN_MAP_QSYS_BASE, MAIN_MAP_MAX_ENTRIES, mainmibs * 1024LL*1024LL); + if (rc < 0) { + goto err_main_configure; + } + + if (metamibs < 0 || metamibs > META_BUF_MIBS_MAX) { + metamibs = META_BUF_MIBS_MAX; + } + + // META stream + state->meta_stream.cdev_name = META_CDEV_NAME; + state->meta_stream.cdev_minor = META_CDEV_MINOR; + state->meta_stream.regs_base = P40_DMA_DAQ_META_STREAM_QSYS_BASE; + state->meta_stream.state = state; + rc = dma_stream_configure(state->common->dev_id, &state->meta_stream, + P40_DMA_DAQ_META_MAP_QSYS_BASE, META_MAP_MAX_ENTRIES, metamibs * 1024LL*1024LL); + if (rc < 0) { + goto err_meta_configure; + } + + // Start in reset mode (the bit auto clears) + pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_RESET, 1 << P40_RST_BIT_DEFAULT); + + state->emu_thread = kthread_run(daq_emu_thread, state, "P40DAQemu%d", common->dev_id); + + common->daq_state = state; + + return rc; + + //dma_stream_destroy(state->common->dev_id, &state->meta_stream); +err_meta_configure: + dma_stream_destroy(state->common->dev_id, &state->main_stream); +err_main_configure: + printk(P40_INFO "remove /dev/pcie40_%d_%s\n", P40_PARM, state->common->dev_id, CTRL_CDEV_NAME); + device_destroy(pcie40_daq_class, MKDEV(MAJOR(state->dev_num), MINOR(state->dev_num)+CTRL_CDEV_MINOR)); +err_dev_ctrl: + unregister_chrdev_region(state->dev_num, P40_DAQ_CDEV_COUNT); +err_alloc_chrdev_region: + kfree(state); +err_kzalloc: + return rc; +} + +void pcie40_daq_emu_remove(struct pcie40_state *common) +{ + struct pcie40_daq_state *state = common->daq_state; + + printk(P40_DIAG "state = 0x%p\n", P40_PARM, state); + + if (!state) { + printk(P40_ERR "no state\n", P40_PARM); + return; + } + + if (state->emu_thread) { + kthread_stop(state->emu_thread); + } + + dma_stream_destroy(state->common->dev_id, &state->meta_stream); + dma_stream_destroy(state->common->dev_id, &state->main_stream); + + printk(P40_INFO "remove /dev/pcie40_%d_%s\n", P40_PARM, state->common->dev_id, CTRL_CDEV_NAME); + device_destroy(pcie40_daq_class, MKDEV(MAJOR(state->dev_num), MINOR(state->dev_num)+CTRL_CDEV_MINOR)); + + unregister_chrdev_region(state->dev_num, P40_DAQ_CDEV_COUNT); + + kfree(state); +}