BLASTing Linux Code

Jan Tobias Mühlberg and Gerald Lüttgen
Department of Computer Science, University of York, York YO10 5DD, U.K.


main page | next example


Commit Overview | Files | Comments

Checking Memory Safety: Example 5


Commit Overview

Commit Key abd559b1052e28d8b9c28aabde241f18fa89090b
Subject [PATCH] sbp2: fix deadlocks and delays on device removal/rmmod
Description Fixes for deadlocks of the ieee1394 and scsi subsystems and long delays in futile error recovery attempts when SBP-2 devices are removed or drivers are unloaded.
- Complete commands quickly with DID_NO_CONNECT if the 1394 node is gone or if the 1394 low-level driver was unloaded.
- Skip unnecessary work in the eh_abort_handler and eh_device_reset_handler if the node or 1394 low-level driver is gone.
- Let scsi's high-level shut down gracefully when sbp2 is being unloaded or detached from the 1394 unit. A call to scsi_remove_device is added for this purpose, which requires us to store a scsi_device pointer.
- scsi_device pointer is obtained from slave_alloc hook and cleared by slave_destroy. This avoids usage of the pointer after the scsi device was deleted e.g. by the user via scsi_mod's sysfs interface.
Requires Linux 2.6.14 kernel source as from git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/linux-2.6.14.y.git

--- a/drivers/ieee1394/sbp2.c
+++ b/drivers/ieee1394/sbp2.c
@@ -596,6 +596,14 @@ static void sbp2util_mark_command_comple
spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
}
+/*
+ * Is scsi_id valid? Is the 1394 node still present?
+ */
+static inline int sbp2util_node_is_available(struct scsi_id_instance_data *scsi_id)
+{
+ return scsi_id && scsi_id->ne && !scsi_id->ne->in_limbo;
+}
+
/*********************************************
@@ -631,11 +639,23 @@ static int sbp2_remove(struct device *de
{
struct unit_directory *ud;
struct scsi_id_instance_data *scsi_id;
+ struct scsi_device *sdev;
SBP2_DEBUG("sbp2_remove");
ud = container_of(dev, struct unit_directory, device);
scsi_id = ud->device.driver_data;
+ if (!scsi_id)
+ return 0;
+
+ /* Trigger shutdown functions in scsi's highlevel. */
+ if (scsi_id->scsi_host)
+ scsi_unblock_requests(scsi_id->scsi_host);
+ sdev = scsi_id->sdev;
+ if (sdev) {
+ scsi_id->sdev = NULL;
+ scsi_remove_device(sdev);
+ }
sbp2_logout_device(scsi_id);
sbp2_remove_device(scsi_id);
@@ -2473,37 +2493,26 @@ static int sbp2scsi_queuecommand(struct
struct scsi_id_instance_data *scsi_id =
(struct scsi_id_instance_data *)SCpnt->device->host->hostdata[0];
struct sbp2scsi_host_info *hi;
+ int result = DID_NO_CONNECT << 16;
SBP2_DEBUG("sbp2scsi_queuecommand");
- /*
- * If scsi_id is null, it means there is no device in this slot,
- * so we should return selection timeout.
- */
- if (!scsi_id) {
- SCpnt->result = DID_NO_CONNECT << 16;
- done (SCpnt);
- return 0;
- }
+ if (!sbp2util_node_is_available(scsi_id))
+ goto done;
hi = scsi_id->hi;
if (!hi) {
SBP2_ERR("sbp2scsi_host_info is NULL - this is bad!");
- SCpnt->result = DID_NO_CONNECT << 16;
- done (SCpnt);
- return(0);
+ goto done;
}
/*
* Until we handle multiple luns, just return selection time-out
* to any IO directed at non-zero LUNs
*/
- if (SCpnt->device->lun) {
- SCpnt->result = DID_NO_CONNECT << 16;
- done (SCpnt);
- return(0);
- }
+ if (SCpnt->device->lun)
+ goto done;
/*
* Check for request sense command, and handle it here
@@ -2514,7 +2523,7 @@ static int sbp2scsi_queuecommand(struct
memcpy(SCpnt->request_buffer, SCpnt->sense_buffer, SCpnt->request_bufflen);
memset(SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer));
sbp2scsi_complete_command(scsi_id, SBP2_SCSI_STATUS_GOOD, SCpnt, done);
- return(0);
+ return 0;
}
/*
@@ -2522,9 +2531,8 @@ static int sbp2scsi_queuecommand(struct
*/
if (!hpsb_node_entry_valid(scsi_id->ne)) {
SBP2_ERR("Bus reset in progress - rejecting command");
- SCpnt->result = DID_BUS_BUSY << 16;
- done (SCpnt);
- return(0);
+ result = DID_BUS_BUSY << 16;
+ goto done;
}
/*
@@ -2535,8 +2543,12 @@ static int sbp2scsi_queuecommand(struct
sbp2scsi_complete_command(scsi_id, SBP2_SCSI_STATUS_SELECTION_TIMEOUT,
SCpnt, done);
}
+ return 0;
- return(0);
+done:
+ SCpnt->result = result;
+ done(SCpnt);
+ return 0;
}
/*
@@ -2683,14 +2695,27 @@ static void sbp2scsi_complete_command(st
}
-static int sbp2scsi_slave_configure (struct scsi_device *sdev)
+static int sbp2scsi_slave_alloc(struct scsi_device *sdev)
{
- blk_queue_dma_alignment(sdev->request_queue, (512 - 1));
+ ((struct scsi_id_instance_data *)sdev->host->hostdata[0])->sdev = sdev;
+ return 0;
+}
+
+static int sbp2scsi_slave_configure(struct scsi_device *sdev)
+{
+ blk_queue_dma_alignment(sdev->request_queue, (512 - 1));
return 0;
}
+static void sbp2scsi_slave_destroy(struct scsi_device *sdev)
+{
+ ((struct scsi_id_instance_data *)sdev->host->hostdata[0])->sdev = NULL;
+ return;
+}
+
+
/*
* Called by scsi stack when something has really gone wrong. Usually
* called when a command has timed-out for some reason.
@@ -2705,7 +2730,7 @@ static int sbp2scsi_abort(struct scsi_cm
SBP2_ERR("aborting sbp2 command");
scsi_print_command(SCpnt);
- if (scsi_id) {
+ if (sbp2util_node_is_available(scsi_id)) {
/*
* Right now, just return any matching command structures
@@ -2742,31 +2767,24 @@ static int sbp2scsi_abort(struct scsi_cm
/*
* Called by scsi stack when something has really gone wrong.
*/
-static int __sbp2scsi_reset(struct scsi_cmnd *SCpnt)
+static int sbp2scsi_reset(struct scsi_cmnd *SCpnt)
{
struct scsi_id_instance_data *scsi_id =
(struct scsi_id_instance_data *)SCpnt->device->host->hostdata[0];
+ unsigned long flags;
SBP2_ERR("reset requested");
- if (scsi_id) {
+ spin_lock_irqsave(SCpnt->device->host->host_lock, flags);
+
+ if (sbp2util_node_is_available(scsi_id)) {
SBP2_ERR("Generating sbp2 fetch agent reset");
sbp2_agent_reset(scsi_id, 0);
}
- return(SUCCESS);
-}
-
-static int sbp2scsi_reset(struct scsi_cmnd *SCpnt)
-{
- unsigned long flags;
- int rc;
-
- spin_lock_irqsave(SCpnt->device->host->host_lock, flags);
- rc = __sbp2scsi_reset(SCpnt);
spin_unlock_irqrestore(SCpnt->device->host->host_lock, flags);
- return rc;
+ return SUCCESS;
}
static const char *sbp2scsi_info (struct Scsi_Host *host)
@@ -2817,7 +2835,9 @@ static struct scsi_host_template scsi_dr
.eh_device_reset_handler = sbp2scsi_reset,
.eh_bus_reset_handler = sbp2scsi_reset,
.eh_host_reset_handler = sbp2scsi_reset,
+ .slave_alloc = sbp2scsi_slave_alloc,
.slave_configure = sbp2scsi_slave_configure,
+ .slave_destroy = sbp2scsi_slave_destroy,
.this_id = -1,
.sg_tablesize = SG_ALL,
.use_clustering = ENABLE_CLUSTERING,

(purple: line numbers and function names; red: line removed; green: line added)

Files

Unmodified sources

Comments

Regarding memory safety, the interesting error in this example is related to the use of the scsi_device pointer, which might be used after removing the related device and freeing the pointer. This can happen due to concurrent interaction of a secondary kernel thread. Since it does not seem to be possible to force BLAST to consider this behaviour, it was not possible to detect the error.

Source: http://www.kernel.org



Jan Tobias Mühlberg, $Date$