Friday 24 April 2009

Vxvm and Emc - failed device troubleshooting and fix

If you have a suspected failed emc disk under vxvm

try a df -k (see if any io errors come back)
run a vxisk list (see if any failed devices are shown)
if failed devices are shown, you can then run the following commands;

Some useful commands;

^C# ./opt/emc/SYMCLI/V6.4.0/bin/syminq
Device Product Device
---------------------------------------- --------------------------- ---------------------
Name Type Vendor ID Rev Ser Num Cap (KB)
---------------------------------------- --------------------------- ---------------------
.........................................truncated........................................................................................
/dev/vx/rdmp/emcpower0s2 DGC RAID 5 0219 03000016 35651584
/dev/vx/rdmp/emcpower1s2 DGC RAID 5 0219 00000015 35651584
/dev/vx/rdmp/emcpower2s2 DGC RAID 5 0219 02000015 35651584


# vxprint -g dgamlap99 -htA
Disk group: dgamlap99
......................................................................
dg dgamlap99 default default 52000 1103536135.1260.eudt0040
dm amlap99dm01 - - - - NODEVICE
dm amlap99dm02 emcpower2s2 auto 1791 71294976 -
dm amlap99dm03 emcpower1s2 auto 1791 71294976 -
...............................................................
v mantas - DISABLED ACTIVE 178233344 SELECT - fsgen
pl mantas-01 mantas DISABLED NODEVICE 178233344 CONCAT - RW
sd amlap99dm01-01 mantas-01 amlap99dm01 0 71294976 0 - NDEV
sd amlap99dm02-03 mantas-01 amlap99dm02 14680064 56614912 71294976 emcpower2 ENA
sd amlap99dm03-02 mantas-01 amlap99dm03 20971520 50323456 127909888 emcpower1 ENA



# vxdisk list | grep amlap99dm (this assumes that you know the disk group etc after running a vxdisk list)
emcpower1s2 auto:sliced amlap99dm03 dgamlap99 online
emcpower2s2 auto:sliced amlap99dm02 dgamlap99 online
- - amlap99dm01 dgamlap99 failed was:emcpower0s2
#

# /etc/powermt display dev=all
Pseudo name=emcpower1a
CLARiiON ID=CK200033400328 [EUDT0040]
Logical device ID=600601602E030E00CA0FF19CCEE6D811 [LUN 0]
state=alive; policy=CLAROpt; priority=0; queued-IOs=0
Owner: default=SP B, current=SP B
==============================================================================
---------------- Host --------------- - Stor - -- I/O Path - -- Stats ---
### HW Path I/O Paths Interf. Mode State Q-IOs Errors
==============================================================================
3072 pci@1c,600000/lpfc@1/fp@0,0 c0t5006016010601796d2s0 SP A0 active alive 0 3
3072 pci@1c,600000/lpfc@1/fp@0,0 c0t5006016910601796d2s0 SP B1 active alive 0 0
3074 pci@1d,700000/lpfc@1/fp@0,0 c1t5006016110601796d2s0 SP A1 active alive 0 3
3074 pci@1d,700000/lpfc@1/fp@0,0 c1t5006016810601796d2s0 SP B0 active alive 0 0

Pseudo name=emcpower2a
CLARiiON ID=CK200033400328 [EUDT0040]
Logical device ID=600601602E030E00CC0FF19CCEE6D811 [LUN 2]
state=alive; policy=CLAROpt; priority=0; queued-IOs=0
Owner: default=SP B, current=SP B
==============================================================================
---------------- Host --------------- - Stor - -- I/O Path - -- Stats ---
### HW Path I/O Paths Interf. Mode State Q-IOs Errors
==============================================================================
3072 pci@1c,600000/lpfc@1/fp@0,0 c0t5006016010601796d1s0 SP A0 active alive 0 3
3072 pci@1c,600000/lpfc@1/fp@0,0 c0t5006016910601796d1s0 SP B1 active alive 0 0
3074 pci@1d,700000/lpfc@1/fp@0,0 c1t5006016110601796d1s0 SP A1 active alive 0 3
3074 pci@1d,700000/lpfc@1/fp@0,0 c1t5006016810601796d1s0 SP B0 active alive 0 0

Pseudo name=emcpower0a
CLARiiON ID=CK200033400328 [EUDT0040]
Logical device ID=600601602E030E00CD0FF19CCEE6D811 [LUN 3]
state=alive; policy=CLAROpt; priority=0; queued-IOs=0
Owner: default=SP A, current=SP A
==============================================================================
---------------- Host --------------- - Stor - -- I/O Path - -- Stats ---
### HW Path I/O Paths Interf. Mode State Q-IOs Errors
==============================================================================
3072 pci@1c,600000/lpfc@1/fp@0,0 c0t5006016010601796d0s0 SP A0 active alive 0 3
3072 pci@1c,600000/lpfc@1/fp@0,0 c0t5006016910601796d0s0 SP B1 active alive 0 0
3074 pci@1d,700000/lpfc@1/fp@0,0 c1t5006016110601796d0s0 SP A1 active alive 0 3
3074 pci@1d,700000/lpfc@1/fp@0,0 c1t5006016810601796d0s0 SP B0 active alive 0 0




the fix could be the following;

Veritas - how to fix failed device



# vxprint -g dgamlap99 -ht

:

DM NAME DEVICE TYPE PRIVLEN PUBLEN STATE

:

dm amlap99dm01 - - - - NODEVICE

dm amlap99dm02 emcpower2s2 auto 1791 71294976 -

:

v mantas - DISABLED ACTIVE 178233344 SELECT - fsgen

pl mantas-01 mantas DISABLED NODEVICE 178233344 CONCAT - RW

sd amlap99dm01-01 mantas-01 amlap99dm01 0 71294976 0 - NDEV

sd amlap99dm02-03 mantas-01 amlap99dm02 14680064 56614912 71294976 emcpower2 ENA





# vxdisk -o alldgs list | grep amlap99dm

emcpower1s2 auto:sliced amlap99dm03 dgamlap99 online

emcpower2s2 auto:sliced amlap99dm02 dgamlap99 online

- - amlap99dm01 dgamlap99 failed was:emcpower0s2



# ./syminq



Device Product Device

---------------------------------------- --------------------------- ---------------------

Name Type Vendor ID Rev Ser Num Cap (KB)

---------------------------------------- --------------------------- ---------------------

:

/dev/vx/rdmp/emcpower0s2 DGC RAID 5 0219 03000016 35651584

/dev/vx/rdmp/emcpower1s2 DGC RAID 5 0219 00000015 35651584

/dev/vx/rdmp/emcpower2s2 DGC RAID 5 0219 02000015 35651584





# /etc/powermt display dev=all

:

Pseudo name=emcpower0a

CLARiiON ID=CK200033400328 [EUDT0040]

Logical device ID=600601602E030E00CD0FF19CCEE6D811 [LUN 3]

state=alive; policy=CLAROpt; priority=0; queued-IOs=0

Owner: default=SP A, current=SP A

==============================================================================

---------------- Host --------------- - Stor - -- I/O Path - -- Stats ---

### HW Path I/O Paths Interf. Mode State Q-IOs Errors

==============================================================================

3072 pci@1c,600000/lpfc@1/fp@0,0 c0t5006016010601796d0s0 SP A0 active alive 0 1

3072 pci@1c,600000/lpfc@1/fp@0,0 c0t5006016910601796d0s0 SP B1 active alive 0 0

3074 pci@1d,700000/lpfc@1/fp@0,0 c1t5006016110601796d0s0 SP A1 active alive 0 1

3074 pci@1d,700000/lpfc@1/fp@0,0 c1t5006016810601796d0s0 SP B0 active alive 0 0





# vxdmpadm listctlr all

CTLR-NAME ENCLR-TYPE STATE ENCLR-NAME

=====================================================

emcp EMC_CLARiiON ENABLED EMC_CLARiiON0

c3 Disk ENABLED Disk



# vxdmpadm getsubpaths ctlr=emcp

NAME STATE[A] PATH-TYPE[M] DMPNODENAME ENCLR-TYPE ENCLR-NAME ATTRS

================================================================================

emcpower0c ENABLED(A) - emcpower0s2 EMC_CLARiiON EMC_CLARiiON0 -

emcpower1c ENABLED(A) - emcpower1s2 EMC_CLARiiON EMC_CLARiiON0 -

emcpower2c ENABLED(A) - emcpower2s2 EMC_CLARiiON EMC_CLARiiON0 -





# vxdisk -g dgamlap99 check emcpower0s2

emcpower0s2: Okay



# /etc/vx/bin/vxreattach





# vxmend -g dgamlap99 fix stale mantas-01



# vxmend -g dgamlap99 fix clean mantas-01



# vxvol -g dgamlap99 startall



# umount /dev/vx/dsk/dgamlap99/mantas



# /opt/VRTS/bin/fsck /dev/vx/rdsk/dgamlap99/mantas



# mount /dev/vx/dsk/dgamlap99/mantas /mantas

No comments: