diff --git a/Driver/pcie40_driver/daq_emu.c b/Driver/pcie40_driver/daq_emu.c
new file mode 100644
index 0000000000000000000000000000000000000000..ea5722f81dda7e81ecae182faeae8e30a2789c46
--- /dev/null
+++ b/Driver/pcie40_driver/daq_emu.c
@@ -0,0 +1,494 @@
+//p40emu``+
+#define P40_FMT "P40DAQemu:%s(): "
+#define PCIE40_DAQ_CLASS "lhcb_pcie40_daq_emu"
+#define PCIE40_EMU
+
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/types.h>
+
+struct pcie40_dma_map;
+static int dma_map_emu_alloc(struct pcie40_dma_map* map, void *base, size_t max_entries, size_t desired_size);
+static int dma_map_emu_free(struct pcie40_dma_map* map);
+
+#include "daq.h"
+
+extern int mpf;
+
+static int dma_map_emu_alloc(struct pcie40_dma_map* map, void *base, size_t max_entries, size_t desired_size)
+{
+  int i;
+
+  map->base = base;
+  map->max_entries = max_entries;
+  map->num_entries = 0;
+  map->size = 0;
+  map->entries = kzalloc(sizeof(struct pcie40_dma_buffer)*max_entries, GFP_KERNEL);
+
+  printk(P40_DIAG "entries = 0x%p\n", P40_PARM, map->entries);
+
+  for (i = 0; i < map->max_entries; ++i) {
+    size_t alloc_size = desired_size - map->size > PCI_MAX_ALLOC
+                        ? PCI_MAX_ALLOC
+                        : desired_size - map->size;
+    while (1) {
+      struct pcie40_dma_buffer* buffer = map->entries + i;
+      buffer->size = alloc_size;
+      buffer->ptr = kzalloc(buffer->size, GFP_KERNEL);
+      //printk("...%zu", buffer->size);
+      buffer->start = virt_to_phys(buffer->ptr);
+      if (buffer->ptr == NULL) {
+        if (buffer->size < 1024*1024) {
+          printk("...NOT ENOUGH MEMORY!\n");
+          map->num_entries = i;
+          dma_map_emu_free(map);
+          return -1;
+        }
+        alloc_size /= 2;
+      } else {
+        map->size += buffer->size;
+        break;
+      }
+    }
+
+    if (map->size >= desired_size) {
+      ++i;
+      break;
+    }
+  }
+  map->num_entries = i;
+
+  return 0;
+}
+
+static int dma_map_emu_free(struct pcie40_dma_map* map)
+{
+  int i;
+  for (i = 0; i < map->num_entries; ++i) {
+    struct pcie40_dma_buffer* buffer = map->entries + i;
+    kfree(buffer->ptr);
+  }
+  kfree(map->entries);
+  map->entries = NULL;
+
+  return 0;
+}
+
+static void dma_stream_set_write_off(struct pcie40_dma_stream *stream, uint32_t write_off)
+{
+  //TODO: spin lock to serialize access!
+  stream->write_off = write_off;
+  pcie40_write32_stream(stream, P40_DMA_DAQ_STREAM_OFF_HOST_BUF_WRITE_OFF, write_off);
+}
+
+//+`dma_stream_emu_write` Emulate a dma write into a given stream.
+static ssize_t dma_stream_emu_write(
+  struct pcie40_dma_stream *stream, int *map_idx, uint32_t *buf_off, const void *from, size_t bytes)//;?>
+//>`stream` Stream to write to.
+//><`map_idx` Index of current DMA buffer, must be initialized to 0.
+//><`buf_off` Offset within current DMA buffer, must be initialized to 0.
+//>`from` Pointer to data to write, if NULL, zeroes will be written instead.
+//>`bytes` Number of bytes to write.
+{
+  uint32_t write_off, write_off_next;
+  ssize_t bytes_written = 0;
+  //? The write is started only if sufficient space is available.
+  size_t bytes_free = dma_stream_get_bytes_free(stream);
+  if (bytes_free <= bytes) {
+    //? If not, a negative value is immediately returned. Its magnitude is the number of bytes missing.
+    return bytes_free - bytes;
+  }
+  // if (write_off + 32 == (read_off & ~0x1F))
+
+  //? A write is split across multiple DMA buffers if the current one does not have sufficient space available.
+  while (bytes_written < bytes) {
+    struct pcie40_dma_buffer *buf = stream->map.entries + *map_idx;
+    size_t bytes_left = bytes - bytes_written;
+    if (*buf_off + bytes_left <= buf->size) {
+      if (from) {
+        memcpy(buf->ptr + *buf_off, from + bytes_written, bytes_left);
+      } else {
+        memset(buf->ptr + *buf_off, 0, bytes_left);
+      }
+      bytes_written += bytes_left;
+      *buf_off += bytes_left;
+    } else {
+      size_t bytes_avail = buf->size - *buf_off;
+      if (from) {
+        memcpy(buf->ptr + *buf_off, from + bytes_written, bytes_avail);
+      } else {
+        memset(buf->ptr + *buf_off, 0, bytes_avail);
+      }
+      *map_idx = (*map_idx + 1) % stream->map.num_entries;
+      bytes_written += bytes_avail;
+      *buf_off = 0;
+    }
+  }
+  spin_lock_irqsave(&stream->off_lock, stream->off_flags);
+  write_off = dma_stream_get_write_off(stream);
+  write_off_next = (write_off + bytes) % stream->map.size;
+  dma_stream_set_write_off(stream, write_off_next);
+  spin_unlock_irqrestore(&stream->off_lock, stream->off_flags);
+  return bytes;
+}
+
+//+`daq_emu_thread` Thread loop generating emulated board data.
+//>`data` Opaque pointer to interface state
+static int daq_emu_thread(void *data) //?>
+{
+  struct pcie40_daq_state *state = data;
+  uint64_t evid_frg = 0;
+  int main_buf = 0;
+  uint32_t main_off = 0;
+  int meta_buf = 0;
+  uint32_t meta_off = 0;
+  int meta_idx = 0;
+  ssize_t write_status = 0;
+  enum { GEN_MHDR, GEN_FRG, GEN_META, GEN_MPAD } gen_state = GEN_MHDR;
+
+  //? One instance of this thread is spawned for each emulated interface.
+  printk(P40_INFO "starting emulator thread for interface %d\n",
+   P40_PARM, state->common->dev_id);
+
+  pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_META_PACKING, mpf);
+
+  //? The thread loops until the module is unloaded. <?
+  while (!kthread_should_stop()) { //... } ?>
+    uint32_t mmr_reset, mmr_main_gen_ctl, mmr_main_gen_fixed, mmr_main_raw_mode, mmr_main_enable, mmr_meta_enable;
+
+    struct pcie40_dma_buffer *main_buffer = state->main_stream.map.entries + main_buf;
+    struct pcie40_dma_buffer *meta_buffer = state->meta_stream.map.entries + meta_buf;
+
+    //? Every loop iteration reads the emulated control registers <?
+    mmr_reset = pcie40_read32_ctrl(state->common, P40_DMA_CTRL_OFF_RESET);
+    mmr_main_gen_ctl = pcie40_read32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_GEN_CTL);
+    mmr_main_gen_fixed = pcie40_read32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_GEN_FIXED);
+    mmr_main_raw_mode = pcie40_read32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_RAW_MODE);
+    mmr_main_enable = pcie40_read32_stream(&state->main_stream, P40_DMA_DAQ_STREAM_OFF_ENABLE);
+    mmr_meta_enable = pcie40_read32_stream(&state->meta_stream, P40_DMA_DAQ_STREAM_OFF_ENABLE);
+    mpf = pcie40_read32_ctrl(state->common, P40_DMA_CTRL_OFF_META_PACKING);
+    //?>Then uses these values to drive its behaviour for the rest of the current emulation cycle.
+
+    if (mmr_reset & (1 << P40_RST_BIT_DEFAULT)) {
+      printk(P40_INFO "%d: reset default\n", P40_PARM, state->common->dev_id);
+
+      mmr_main_gen_ctl &= ~(1 << P40_MAIN_GEN_BIT_ENABLE);
+      mmr_main_gen_ctl &= ~(1 << P40_MAIN_GEN_BIT_FIXED);
+      pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_GEN_CTL, mmr_main_gen_ctl);
+
+      mmr_main_gen_fixed = 0x6243484C;
+      pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_GEN_CTL, mmr_main_gen_fixed);
+
+      mmr_main_enable = 1;
+      pcie40_write32_stream(&state->main_stream, P40_DMA_DAQ_STREAM_OFF_ENABLE, mmr_main_enable);
+      mmr_meta_enable = 0;
+      pcie40_write32_stream(&state->meta_stream, P40_DMA_DAQ_STREAM_OFF_ENABLE, mmr_meta_enable);
+
+      mmr_reset |= (1 << P40_RST_BIT_LOGIC);
+    }
+    if (mmr_reset & (1 << P40_RST_BIT_LOGIC)) {
+      printk(P40_INFO "%d: reset logic\n", P40_PARM, state->common->dev_id);
+
+      mmr_main_gen_ctl &= ~(1 << P40_MAIN_GEN_BIT_RUNNING);
+      pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_GEN_CTL, mmr_main_gen_ctl);
+
+      dma_stream_set_read_off(&state->main_stream, 0);
+      dma_stream_set_write_off(&state->main_stream, 0);
+
+      dma_stream_set_read_off(&state->meta_stream, 0);
+      dma_stream_set_write_off(&state->meta_stream, 0);
+
+      gen_state = GEN_MHDR;
+    }
+    if (mmr_reset & (1 << P40_RST_BIT_FLUSH)) {
+      printk(P40_INFO "%d: reset flush\n", P40_PARM, state->common->dev_id);
+    }
+    pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_RESET, 0);
+
+retry:
+  //Jump to this point to retry the last operation without re-reading the registers
+  // (useful in the middle of flushing, once the reset register has been zeroed but we're not done flushing yet)
+
+    if (!(mmr_main_gen_ctl & (1 << P40_MAIN_GEN_BIT_ENABLE))) {
+      evid_frg = 0;
+      main_buf = 0;
+      main_off = 0;
+      meta_buf = 0;
+      meta_off = 0;
+      meta_idx = 0;
+      gen_state = GEN_MHDR;
+    }
+
+    if ((mmr_main_enable == 1) && (mmr_main_gen_ctl & (1 << P40_MAIN_GEN_BIT_ENABLE))) {
+
+      struct __attribute__((__packed__)) {
+        uint64_t evid;
+        uint32_t ghdr;
+        uint8_t  data[128 - sizeof(uint64_t) - sizeof(uint32_t)]; //Must be aligned to 32 bytes
+      } frg_nometa;
+
+      struct __attribute__((__packed__)) {
+        char magic[6];
+        uint16_t frags;
+        uint64_t evid;
+        uint64_t offset;
+      } meta_hdr;
+
+      struct __attribute__((__packed__)) {
+        uint32_t ghdr;
+        uint8_t  data[104 - sizeof(uint32_t)]; //Must be aligned to 8 bytes
+      } frg;
+
+      uint16_t meta;
+      size_t meta_size = sizeof(meta_hdr) + mpf * sizeof(meta);
+      size_t meta_left = (mpf - meta_idx) * sizeof(meta);
+      size_t meta_pad = meta_size % 8 ? (meta_size - meta_size % 8) : 0; //Metadata is also aligned to 8 bytes
+
+      if (mmr_reset & (1 << P40_RST_BIT_FLUSH)) {
+        if (mmr_meta_enable) {
+            switch (gen_state) {
+              case GEN_MHDR:
+                // Just generate a dummy header with an evid of 0xFF...
+                evid_frg = 0;
+                meta_idx = 0;
+                memcpy(meta_hdr.magic, "META40", sizeof(meta_hdr.magic));
+                meta_hdr.evid = 0xFFFFFFFFFFFFFFFF;
+                meta_hdr.offset = main_off;
+                write_status = dma_stream_emu_write(&state->meta_stream,
+                 &meta_buf, &meta_off, &meta_hdr, sizeof(meta_hdr));
+                if (write_status < 0) {
+                  msleep_interruptible(1);
+                  goto retry;
+                }
+                break;
+              case GEN_FRG:
+                //Before the flush we were supposed to emit a fragment, now we aren't anymore
+                //just fallthrough to the next case
+              case GEN_META:
+                //Also nothing to do here, no new fragments to create metadata for
+                //just fallthrough to the next case
+              case GEN_MPAD:
+                write_status = dma_stream_emu_write(&state->meta_stream,
+                 &meta_buf, &meta_off, NULL, meta_left + meta_pad);
+                if (write_status >= 0) {
+                  msleep_interruptible(1);
+                  goto retry;
+                }
+                break;
+            }
+        } else {
+          frg_nometa.evid = 0xFFFFFFFFFFFFFFFF;
+          frg_nometa.ghdr = ((100 * 8) & 0xFFFFF) | ((evid_frg & 0xFFF) << 20); //Make them 100 bytes
+          memset(frg_nometa.data, 0, sizeof(frg_nometa.data));
+          write_status = dma_stream_emu_write(&state->main_stream,
+           &main_buf, &main_off, &frg_nometa, sizeof(frg_nometa));
+          if (write_status > 0) {
+            evid_frg = 0;
+          }
+        }
+      } else //Not flushing
+       if (mmr_main_gen_ctl & (1 << P40_MAIN_GEN_BIT_RUNNING)) {
+        if (mmr_main_gen_ctl & (1 << P40_MAIN_GEN_BIT_FIXED)) {
+        // Generate fixed pattern
+          uint32_t pattern[4];
+          mmr_main_raw_mode = 1;
+          pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_RAW_MODE, mmr_main_raw_mode);
+          //Because the FPGA writes payloads in BE
+          pattern[0] = pattern[1] = pattern[2] = pattern[3] = cpu_to_be32(mmr_main_gen_fixed);
+
+          write_status = dma_stream_emu_write(&state->main_stream,
+           &main_buf, &main_off, &pattern, sizeof(pattern));
+        } else {
+        // Generate fragments
+          mmr_main_raw_mode = 0;
+          pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_MAIN_RAW_MODE, mmr_main_raw_mode);
+
+          // If the metadata stream is enabled, use it for metadata
+          if (mmr_meta_enable) {
+            switch (gen_state) {
+              case GEN_MHDR:
+                meta_idx = 0;
+                memcpy(meta_hdr.magic, "META40", sizeof(meta_hdr.magic));
+                meta_hdr.frags = mpf;
+                meta_hdr.evid = evid_frg;
+                meta_hdr.offset = main_off;
+                write_status = dma_stream_emu_write(&state->meta_stream,
+                 &meta_buf, &meta_off, &meta_hdr, sizeof(meta_hdr));
+                if (write_status > 0) {
+                  gen_state = GEN_FRG;
+                }
+                break;
+              case GEN_FRG:
+                frg.ghdr = ((100 * 8) & 0xFFFFF) | ((evid_frg & 0xFFF) << 20); //Make them 100 bytes
+                memset(frg.data, (uint8_t)evid_frg, sizeof(frg.data));
+                write_status = dma_stream_emu_write(&state->main_stream,
+                 &main_buf, &main_off, &frg, sizeof(frg));
+                if (write_status > 0) {
+                  ++meta_idx;
+                  gen_state = GEN_META;
+                }
+                break;
+              case GEN_META:
+                meta = (frg.ghdr & 0xFFFFF) / 8; //Turn into bytes and pad to 8
+                if (meta % 8) meta += 8 - meta % 8;
+                write_status = dma_stream_emu_write(&state->meta_stream,
+                 &meta_buf, &meta_off, &meta, sizeof(meta));
+                if (write_status > 0) {
+                  if (meta_idx < mpf) {
+                    gen_state = GEN_FRG;
+                  } else {
+                    gen_state = GEN_MPAD;
+                  }
+                }
+                break;
+              case GEN_MPAD:
+                write_status = dma_stream_emu_write(&state->meta_stream,
+                 &meta_buf, &meta_off, NULL, meta_pad);
+                if (write_status >= 0) {
+                  gen_state = GEN_MHDR;
+                }
+                break;
+            }
+          } else {
+            // Otherwise generate only fragments
+            frg_nometa.evid = evid_frg;
+            frg_nometa.ghdr = ((100 * 8) & 0xFFFFF) | ((evid_frg & 0xFFF) << 20); //Make them 100 bytes
+            memset(frg_nometa.data, (uint8_t)evid_frg, sizeof(frg_nometa.data));
+            write_status = dma_stream_emu_write(&state->main_stream,
+             &main_buf, &main_off, &frg_nometa, sizeof(frg_nometa));
+            if (write_status > 0) {
+              ++evid_frg;
+            }
+          }
+        }
+      }
+    }
+    //XXX: Apparently we soft-lockup the kernel if we do not yield at least from time to time?
+    if (write_status < 0) {
+      msleep_interruptible(1);
+    } else {
+      yield();
+    }
+  }
+
+  printk(P40_INFO "stopping emulator thread for interface %d\n",
+   P40_PARM, state->common->dev_id);
+  return 0;
+}
+
+int pcie40_daq_emu_probe(struct pcie40_state *common)
+{
+  int rc = 0;
+
+  struct pcie40_daq_state *state = NULL;
+  uint32_t regmap_version, fpga_version;
+
+  state = kzalloc(sizeof(struct pcie40_daq_state), GFP_KERNEL);
+  if (IS_ERR(state)) {
+    printk(P40_ERR "kzalloc()\n", P40_PARM);
+    rc = PTR_ERR(state);
+    goto err_kzalloc;
+  }
+  state->common = common;
+  printk(P40_DIAG "state = 0x%p\n", P40_PARM, state);
+
+  init_waitqueue_head(&state->wait);
+
+  rc = alloc_chrdev_region(&(state->dev_num), P40_DAQ_CDEV_BASEMINOR, P40_DAQ_CDEV_COUNT, P40_DRV_NAME);
+  if (rc < 0) {
+    printk(P40_ERR "alloc_chrdev_region()\n", P40_PARM);
+    goto err_alloc_chrdev_region;
+  }
+  regmap_version = pcie40_read32_ctrl(state->common, P40_DMA_CTRL_OFF_REGMAP);
+  printk(P40_INFO "Register map version: 0x%08X\n", P40_PARM, regmap_version);
+
+  fpga_version = pcie40_read32_ctrl(state->common, P40_DMA_CTRL_OFF_VERSION);
+  printk(P40_INFO "FPGA core version: %X.%02X (%04X)\n", P40_PARM,
+   fpga_version >> 24, (fpga_version >> 16) & 0xFF, fpga_version & 0xFFFF);
+
+  // CTRL endpoint
+  rc = pcie40_setup_cdev(pcie40_daq_class, &(state->ctrl_cdev), state->dev_num, CTRL_CDEV_MINOR, 1, CTRL_CDEV_NAME, state->common->dev_id, &ctrl_file_ops);
+  if (rc < 0) {
+    goto err_dev_ctrl;
+  }
+
+  // TODO: the streams should be configured only when the corresponding device is opened (so, if we're not using metadata or odin, that memory will not be allocated at all)
+
+  if (mainmibs < 0 || mainmibs > MAIN_BUF_MIBS_MAX) {
+    mainmibs = MAIN_BUF_MIBS_MAX;
+  }
+
+  // MAIN stream
+  state->main_stream.cdev_name = MAIN_CDEV_NAME;
+  state->main_stream.cdev_minor = MAIN_CDEV_MINOR;
+  state->main_stream.regs_base = P40_DMA_DAQ_MAIN_STREAM_QSYS_BASE;
+  state->main_stream.state = state;
+  rc = dma_stream_configure(state->common->dev_id, &state->main_stream,
+   P40_DMA_DAQ_MAIN_MAP_QSYS_BASE, MAIN_MAP_MAX_ENTRIES, mainmibs * 1024LL*1024LL);
+  if (rc < 0) {
+    goto err_main_configure;
+  }
+
+  if (metamibs < 0 || metamibs > META_BUF_MIBS_MAX) {
+    metamibs = META_BUF_MIBS_MAX;
+  }
+
+  // META stream
+  state->meta_stream.cdev_name = META_CDEV_NAME;
+  state->meta_stream.cdev_minor = META_CDEV_MINOR;
+  state->meta_stream.regs_base = P40_DMA_DAQ_META_STREAM_QSYS_BASE;
+  state->meta_stream.state = state;
+  rc = dma_stream_configure(state->common->dev_id, &state->meta_stream,
+   P40_DMA_DAQ_META_MAP_QSYS_BASE, META_MAP_MAX_ENTRIES, metamibs * 1024LL*1024LL);
+  if (rc < 0) {
+    goto err_meta_configure;
+  }
+
+  // Start in reset mode (the bit auto clears)
+  pcie40_write32_ctrl(state->common, P40_DMA_CTRL_OFF_RESET, 1 << P40_RST_BIT_DEFAULT);
+
+  state->emu_thread = kthread_run(daq_emu_thread, state, "P40DAQemu%d", common->dev_id);
+
+  common->daq_state = state;
+
+  return rc;
+
+  //dma_stream_destroy(state->common->dev_id, &state->meta_stream);
+err_meta_configure:
+  dma_stream_destroy(state->common->dev_id, &state->main_stream);
+err_main_configure:
+  printk(P40_INFO "remove /dev/pcie40_%d_%s\n", P40_PARM, state->common->dev_id, CTRL_CDEV_NAME);
+  device_destroy(pcie40_daq_class, MKDEV(MAJOR(state->dev_num), MINOR(state->dev_num)+CTRL_CDEV_MINOR));
+err_dev_ctrl:
+  unregister_chrdev_region(state->dev_num, P40_DAQ_CDEV_COUNT);
+err_alloc_chrdev_region:
+  kfree(state);
+err_kzalloc:
+  return rc;
+}
+
+void pcie40_daq_emu_remove(struct pcie40_state *common)
+{
+  struct pcie40_daq_state *state = common->daq_state;
+
+  printk(P40_DIAG "state = 0x%p\n", P40_PARM, state);
+
+  if (!state) {
+    printk(P40_ERR "no state\n", P40_PARM);
+    return;
+  }
+
+  if (state->emu_thread) {
+    kthread_stop(state->emu_thread);
+  }
+
+  dma_stream_destroy(state->common->dev_id, &state->meta_stream);
+  dma_stream_destroy(state->common->dev_id, &state->main_stream);
+
+  printk(P40_INFO "remove /dev/pcie40_%d_%s\n", P40_PARM, state->common->dev_id, CTRL_CDEV_NAME);
+  device_destroy(pcie40_daq_class, MKDEV(MAJOR(state->dev_num), MINOR(state->dev_num)+CTRL_CDEV_MINOR));
+
+  unregister_chrdev_region(state->dev_num, P40_DAQ_CDEV_COUNT);
+
+  kfree(state);
+}