[Libdlo] [PATCH] udlfb: high-throughput urb pool

Andrew Kephart akephart at akephart.org
Fri Dec 17 12:17:34 PST 2010


udlfb: add parameters for URB pool size and for nonblocking URB pool

For high-throughput usage (e.g. mplayer -vo fbdev), more than 4 URBs may
be needed to keep the pipeline fed.  This patch adds a module parameter
for specifying the initial depth of the URB pool.  This patch also adds
a boolean module parameter that enables a "nowait" mode, wherein no
semaphore is used (only the spinlock is needed for list coherency). 

In my testing with defio mode, I found that the delayed work mechanism
(for releasing the URB semaphore) could not be scheduled quickly enough,
causing unnecessary delays between URB availability and semaphore
release yielding pool exhaustion regardless of initial depth.  By adding
the nowait mode (i.e. removing the semaphore), URBs are made available
immediately upon completion, even if defio is active.  

In cases where you have low bandwidth requirements, but with occasional
small burstiness, the nowait mode has a higher probability of lost pixel
data, but in the higher-bandwidth conditions, the nowait mode actually
has a much lower probability of pixel loss, since the URBs are much more
likely to be available at request time.  

For tuning purposes, this patch also adds URB cache stats to the sysfs
metrics.

We may not want to incorporate this into udlfb as-is, but it marks a
starting point for the throughput discussion....

-andrew

---



diff --git a/udlfb.c b/udlfb.c
index 03ba2d9..8b4c075 100644
--- a/udlfb.c
+++ b/udlfb.c
@@ -67,6 +67,8 @@ MODULE_DEVICE_TABLE(usb, id_table);
 /* module options */
 static int console;   /* Optionally allow fbcon to consume first framebuffer */
 static int fb_defio;  /* Optionally enable experimental fb_defio mmap support */
+static int num_urb=WRITES_IN_FLIGHT;  /* Number of URBs to pre-allocate */
+static int urb_nowait=0; /* Optionally enable nowait mode */
 
 /*
  * When building as a separate module against an arbitrary kernel,
@@ -1001,6 +1003,12 @@ static void dlfb_free(struct kref *kref)
 
        dl_warn("freeing dlfb_data %p\n", dev);
 
+    if (urb_nowait)
+        dl_info("URB cache hits: %d, misses: %d, min_avail: %d\n",
+                atomic_read(&dev->urb_cache_hits),
+                atomic_read(&dev->urb_cache_misses),
+                atomic_read(&dev->urb_cache_min));
+
        kfree(dev);
 }
 
@@ -1454,6 +1462,38 @@ static ssize_t metrics_cpu_kcycles_used_show(struct device *fbdev,
                        atomic_read(&dev->cpu_kcycles_used));
 }
 
+static ssize_t metrics_urb_cache_hits_show(struct device *fbdev,
+                                  struct device_attribute *a, char *buf) {
+       struct fb_info *fb_info = dev_get_drvdata(fbdev);
+       struct dlfb_data *dev = fb_info->par;
+       return snprintf(buf, PAGE_SIZE, "%u\n",
+                       atomic_read(&dev->urb_cache_hits));
+}
+
+static ssize_t metrics_urb_cache_misses_show(struct device *fbdev,
+                                  struct device_attribute *a, char *buf) {
+       struct fb_info *fb_info = dev_get_drvdata(fbdev);
+       struct dlfb_data *dev = fb_info->par;
+       return snprintf(buf, PAGE_SIZE, "%u\n",
+                       atomic_read(&dev->urb_cache_misses));
+}
+
+static ssize_t metrics_urb_cache_min_show(struct device *fbdev,
+                                  struct device_attribute *a, char *buf) {
+       struct fb_info *fb_info = dev_get_drvdata(fbdev);
+       struct dlfb_data *dev = fb_info->par;
+       return snprintf(buf, PAGE_SIZE, "%u\n",
+                       atomic_read(&dev->urb_cache_min));
+}
+
+static ssize_t metrics_urb_cache_busy_show(struct device *fbdev,
+                                  struct device_attribute *a, char *buf) {
+       struct fb_info *fb_info = dev_get_drvdata(fbdev);
+       struct dlfb_data *dev = fb_info->par;
+       return snprintf(buf, PAGE_SIZE, "%u\n",
+                       num_urb - dev->urbs.available);
+}
+
 static ssize_t edid_show(
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
                        struct file *filp,
@@ -1536,6 +1576,10 @@ static struct device_attribute fb_device_attrs[] = {
        __ATTR_RO(metrics_bytes_sent),
        __ATTR_RO(metrics_cpu_kcycles_used),
        __ATTR(metrics_reset, S_IWUGO, NULL, metrics_reset_store),
+       __ATTR_RO(metrics_urb_cache_hits),
+       __ATTR_RO(metrics_urb_cache_misses),
+       __ATTR_RO(metrics_urb_cache_min),
+       __ATTR_RO(metrics_urb_cache_busy),
 };
 
 /*
@@ -1678,11 +1722,12 @@ static int dlfb_usb_probe(struct usb_interface *interface,
                goto error;
        }
 
-       if (!dlfb_alloc_urb_list(dev, WRITES_IN_FLIGHT, MAX_TRANSFER)) {
+       if (!dlfb_alloc_urb_list(dev, num_urb, MAX_TRANSFER)) {
                retval = -ENOMEM;
                dl_err("dlfb_alloc_urb_list failed\n");
                goto error;
        }
+    atomic_set(&dev->urb_cache_min,num_urb);
 
        /* We don't register a new USB class. Our client interface is fbdev */
 
@@ -1860,10 +1905,12 @@ static void dlfb_urb_completion(struct urb *urb)
         * When using fb_defio, we deadlock if up() is called
         * while another is waiting. So queue to another process.
         */
-       if (fb_defio)
-               schedule_delayed_work(&unode->release_urb_work, 0);
-       else
-               up(&dev->urbs.limit_sem);
+    if (!urb_nowait) {
+        if (fb_defio)
+            schedule_delayed_work(&unode->release_urb_work, 0);
+        else
+            up(&dev->urbs.limit_sem);
+    }
 }
 
 static void dlfb_free_urb_list(struct dlfb_data *dev)
@@ -1959,6 +2006,40 @@ static int dlfb_alloc_urb_list(struct dlfb_data *dev, int count, size_t size)
        return i;
 }
 
+static struct urb *dlfb_get_urb_nowait(struct dlfb_data *dev)
+{
+       int ret = 0;
+       struct list_head *entry;
+       struct urb_node *unode;
+       struct urb *urb = NULL;
+       unsigned long flags;
+
+       spin_lock_irqsave(&dev->urbs.lock, flags);
+    if (dev->urbs.available) {
+        entry = dev->urbs.list.next;
+        list_del_init(entry);
+        dev->urbs.available--;
+        ret = 1;
+        /* STATS */
+        atomic_inc(&dev->urb_cache_hits);
+        if (dev->urbs.available < atomic_read(&dev->urb_cache_min))
+            atomic_set(&dev->urb_cache_min,dev->urbs.available);
+    }
+       spin_unlock_irqrestore(&dev->urbs.lock, flags);
+
+    if (!ret) {
+        atomic_set(&dev->urb_cache_min,0);
+        atomic_inc(&dev->urb_cache_misses);
+        goto error;
+    }
+
+       unode = list_entry(entry, struct urb_node, entry);
+       urb = unode->urb;
+
+error:
+       return urb;
+}
+
 static struct urb *dlfb_get_urb(struct dlfb_data *dev)
 {
        int ret = 0;
@@ -1967,6 +2048,9 @@ static struct urb *dlfb_get_urb(struct dlfb_data *dev)
        struct urb *urb = NULL;
        unsigned long flags;
 
+    if (urb_nowait) 
+        return dlfb_get_urb_nowait(dev);
+
        /* Wait for an in-flight buffer to complete and get re-queued */
        ret = down_timeout(&dev->urbs.limit_sem, GET_URB_TIMEOUT);
        if (ret) {
@@ -2014,6 +2098,12 @@ MODULE_PARM_DESC(console, "Allow fbcon to consume first framebuffer found");
 module_param(fb_defio, bool, S_IWUSR | S_IRUSR | S_IWGRP | S_IRGRP);
 MODULE_PARM_DESC(fb_defio, "Enable fb_defio mmap support. *Experimental*");
 
+module_param(urb_nowait, bool, S_IWUSR | S_IRUSR | S_IWGRP | S_IRGRP);
+MODULE_PARM_DESC(urb_nowait, "Enable nowait mode for urb list. *Experimental*");
+
+module_param(num_urb, int, S_IWUSR | S_IRUSR | S_IWGRP | S_IRGRP);
+MODULE_PARM_DESC(num_urb, "Number of URBs to initially allocate.");
+
 MODULE_AUTHOR("Roberto De Ioris <roberto at unbit.it>, "
              "Jaya Kumar <jayakumar.lkml at gmail.com>, "
              "Bernie Thompson <bernie at plugable.com>");
diff --git a/udlfb.h b/udlfb.h
index f13904c..388e773 100644
--- a/udlfb.h
+++ b/udlfb.h
@@ -55,6 +55,10 @@ struct dlfb_data {
        atomic_t bytes_identical; /* saved effort with backbuffer comparison */
        atomic_t bytes_sent; /* to usb, after compression including overhead */
        atomic_t cpu_kcycles_used; /* transpired during pixel processing */
+    /* URB cache metrics, exposed through sysfs */
+    atomic_t urb_cache_hits; /* Times URB was available in no-wait mode */
+    atomic_t urb_cache_misses; /* Times URB was unavailable in no-wait mode  */
+    atomic_t urb_cache_min;  /* Lowest level of URB cache */
 };
 
 #define NR_USB_REQUEST_I2C_SUB_IO 0x02
--



More information about the Libdlo mailing list