Skip to content

Commit a7b948b

Browse files
Srivastava, PiyushSrivastava, Piyush
authored andcommitted
bugfix/CSTACKEX-147: Deletion of non last VM on any host still shows the deleted lun as attached devices on host
1 parent a6e4b49 commit a7b948b

1 file changed

Lines changed: 72 additions & 3 deletions

File tree

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/IscsiAdmStorageAdaptor.java

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -350,23 +350,92 @@ private boolean hasOtherActiveLuns(String host, int port, String iqn, String lun
350350
}
351351
for (java.io.File entry : entries) {
352352
String name = entry.getName();
353-
if (name.startsWith(prefix) && !name.equals(prefix + lun)) {
353+
// Skip partition entries (e.g. lun-0-part1, lun-0-part2) — these are not
354+
// independent LUNs, they are partition symlinks for the same LUN disk.
355+
// Only count actual LUN entries (no "-part" suffix after the lun number).
356+
if (name.startsWith(prefix) && !name.equals(prefix + lun) && !name.contains("-part")) {
354357
logger.debug("Found other active LUN on same target: " + name);
355358
return true;
356359
}
357360
}
358361
return false;
359362
}
360363

364+
/**
365+
* Removes a single stale SCSI device from the kernel using the sysfs interface.
366+
*
367+
* When ONTAP unmaps a LUN from the host's igroup, the by-path symlink and the
368+
* underlying SCSI device (/dev/sdX) remain present in the kernel until explicitly
369+
* removed — the kernel does not auto-remove devices from live iSCSI sessions.
370+
*
371+
* This method resolves the by-path symlink to the real block device name (e.g. sdd),
372+
* then writes "1" to /sys/block/<dev>/device/delete — the standard Linux kernel SCSI
373+
* API for removing a single device without tearing down the entire iSCSI session.
374+
* Once the kernel processes the delete, it also removes the by-path symlink.
375+
*
376+
* This is used instead of iscsiadm --logout when other LUNs on the same IQN are still
377+
* active (ONTAP single-IQN-per-SVM model), since logout would tear down ALL LUNs.
378+
*/
379+
private void removeStaleScsiDevice(String host, int port, String iqn, String lun) {
380+
String byPath = getByPath(host, port, "/" + iqn + "/" + lun);
381+
java.nio.file.Path byPathLink = java.nio.file.Paths.get(byPath);
382+
if (!java.nio.file.Files.exists(byPathLink)) {
383+
logger.debug("by-path entry for LUN " + lun + " already gone, nothing to remove");
384+
return;
385+
}
386+
try {
387+
java.nio.file.Path realDevice = byPathLink.toRealPath();
388+
String devName = realDevice.getFileName().toString();
389+
java.io.File deleteFile = new java.io.File("/sys/block/" + devName + "/device/delete");
390+
if (!deleteFile.exists()) {
391+
logger.warn("sysfs delete entry not found for device " + devName + " — cannot remove stale SCSI device");
392+
return;
393+
}
394+
try (java.io.FileWriter fw = new java.io.FileWriter(deleteFile)) {
395+
fw.write("1");
396+
}
397+
logger.info("Removed stale SCSI device " + devName + " for LUN /" + iqn + "/" + lun + " via sysfs");
398+
399+
// Also remove any partition by-path symlinks for this LUN (e.g. lun-0-part1, lun-0-part2).
400+
// Writing "1" to the parent device's sysfs delete removes the disk and all its partitions
401+
// from the kernel, but the by-path symlinks for partitions can remain as dangling symlinks.
402+
// Clean them up explicitly.
403+
String partPrefix = byPath + "-part";
404+
java.io.File byPathDir = new java.io.File("/dev/disk/by-path");
405+
java.io.File[] entries = byPathDir.listFiles();
406+
if (entries != null) {
407+
for (java.io.File entry : entries) {
408+
if (entry.getAbsolutePath().startsWith(partPrefix)) {
409+
try {
410+
java.nio.file.Files.deleteIfExists(entry.toPath());
411+
logger.info("Removed stale partition symlink: " + entry.getName());
412+
} catch (Exception e) {
413+
logger.warn("Failed to remove partition symlink " + entry.getName() + ": " + e.getMessage());
414+
}
415+
}
416+
}
417+
}
418+
} catch (Exception e) {
419+
logger.warn("Failed to remove stale SCSI device for LUN /" + iqn + "/" + lun + ": " + e.getMessage());
420+
}
421+
}
422+
361423
private boolean disconnectPhysicalDisk(String host, int port, String iqn, String lun) {
362424
// Check if other LUNs on the same IQN target are still in use.
363425
// ONTAP (and similar) uses a single IQN per SVM with multiple LUNs.
364426
// Doing iscsiadm --logout tears down the ENTIRE target session,
365427
// which would destroy access to ALL LUNs — not just the one being disconnected.
366428
if (hasOtherActiveLuns(host, port, iqn, lun)) {
367429
logger.info("Skipping iSCSI logout for /" + iqn + "/" + lun +
368-
" — other LUNs on the same target are still active");
369-
return true;
430+
" — other LUNs on the same target are still active. Removing stale SCSI device for this LUN only.");
431+
removeStaleScsiDevice(host, port, iqn, lun);
432+
// After removing this LUN's device, re-check: if no other LUNs remain active,
433+
// we are the last one and must logout to clean up the iSCSI session entirely.
434+
if (hasOtherActiveLuns(host, port, iqn, lun)) {
435+
logger.info("Other LUNs still active after removing /" + iqn + "/" + lun + " — session kept alive.");
436+
return true;
437+
}
438+
logger.info("No more active LUNs on target after removing /" + iqn + "/" + lun + " — proceeding with iSCSI logout.");
370439
}
371440

372441
// No other LUNs active on this target — safe to logout and delete the node record.

0 commit comments

Comments
 (0)