Group Selection Page HELP Controls Group ITDM DET Vacuum WP78 FXE SPB HED SQS SXP Sample Environment PSPO XO TS Migrated2Zulip Archived2GitlabPages Archived2PDF MigratedByGroup
WP76 EEE DET_archived DET_deleted
General electronics MicroTCA EEE Electronics Lab EEE Rack Room
  MicroTCA Logbook  Not logged in ELOG logo
Message ID: 59     Entry time: 03 Dec 2013, 10:39
Author: Frank Babies 
Type: Software Changes 
Category: utcaX 
Subject: base s/w and configuration changed on all utca's 

Puppet Setup in a XFEL environment:

Basics: -     Change BIOS Settings to boot via PXE (now USB)
-    Install Ubuntu 12.04.04-server-amd64
-    Setup Lang: EN, Keyb. EN (US)
-    /etc/fstab "/ option = discard,noatime,errors=remount-ro"

-    Network: 192.168.81.xx/24 gw. 192.168.81.16
-    DNS 131.169.40.200 131.169.194.200
-    Partition: sda1 / 86GB, Partition2: sda5 / 8GB swap, option = discard,noatime
-    Enable sshd
-    Disable ipv6

net.ipv6.conf.all.disable_ipv6=1
net.ipv6.conf.default.disable_ipv6=1
net.ipv6.conf.lo.disable_ipv6=1

-    Deinstall irqbalance
-    change swappines from 60% to10% (echo "vm.swappiness=10" >> /etc/sysctl.conf)
-    User:Passwd – utcaadm:xxxxx
-     %exfl_jet            ALL=(ALL:ALL) ALL  >>  /etc/sudoers
-    nagios  ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/

-     AddressFamily inet   >>  /etc/ssh/sshd_config

 

Blacklist Modules:

 -    lp
-    ppdev
-    parport_pc
-    parport
-    pcmouse
-    soundcore
-    snd
-    snd_page_alloc
-    snd_timer
-    snd_pmc
-    snd_hwdep
-    snd_hda_codec
-    snd_hda_intel

 


After reboot setup the Sourcen configuration:

             Change the entry in the /etc/apt/sources.list:
-    deb http://doocspkgs.desy.de/pub/doocs precise main
-    deb-src http://doocspkgs.desy.de/pub/doocs precise main
-    # DESY Ubuntu Repository
-    deb  http://nims.desy.de/ubuntu precise main restricted universe multiverse
-    deb  http://nims.desy.de/ubuntu precise-updates main restricted universe multiverse
-    deb   http://nims.desy.de/ubuntu precise-security main restricted universe multiverse
-    deb-src  http://nims.desy.de/ubuntu precise main restricted universe multiverse
-    deb-src  http://nims.desy.de/ubuntu precise-updates main restricted universe multiverse
-    deb-src  http://nims.desy.de/ubuntu precise-security main restricted universe multiverse


Install other packages:
-    ntp, rsyslog, pciedev-dkms, upciedev-dkms, doocs-dirs-min, x1timer-dkms, doocs-x2timer-server, doocs-pcie-tools, build-essential, dkms, module-assistant, locate, postfix, nedit, emacs, [spd-adq-pci-dkms*.deb, adqupdater_0.13437_amd64.deb,libadq0_0.13437_amd64.deb]
-   nagios-nrpe-server nagios-plugins, autofs, subversion, libnss3-dev, pkg-config, smartmontools, hddtemp, sysstat, postfix, python-qt4, hwinfo, mc, ipmitool
-   preload, ethtool, nfs-common, openssh-server, openssh-client, krb5-user, libpam-krb5, acpi, lm-sensors, rrdtool, perl, gmetad_3.6.0-1ubuntu2_amd64.deb, ganglia-monitor-python_3.6.0-1ubuntu2_all.deb, ganglia-monitor_3.6.0-1ubuntu2_amd64.deb, libganglia1_3.6.0-1ubuntu2_amd64.deb, libconfuse0

 
Configure the packages:

HOSTS
-127.0.0.1       localhost
#-127.0.1.1       exflutca3
#-192.168.81.106  exflutca3.desy.de       exflutca3
#-192.168.81.107  exflutca3-mch.desy.de   exflutca3-mch
-192.168.81.200  exflutcadev.desy.de     exflutcadev
# The following lines are desirable for IPv6 capable hosts
-#::1     ip6-localhost ip6-loopback
-#fe00::0 ip6-localnet
-#ff00::0 ip6-mcastprefix
-#ff02::1 ip6-allnodes
-#ff02::2 ip6-allrouters

NTP
-    Configuration from  "/etc/ntp.conf" change:
-    server ntp.desy.de
-    server ntp1.desy.de
-    server ntp2.desy.de
-    server ntp3.desy.de
-    fudge 127.127.1.0 stratum 13
-    restrict 127.0.0.1
-    restrict ::1
     STOP NTP -> ntpdate -> start ntp: "/usr/sbin/ntpd -p /var/run/ntpd.pid -g -u 106:113"

nagios:

-   Copy attached file nrpe.cfg to /etc/nagios

-   Copy rest of attached files to /usr/lib/nagios/plugins

-   Make them executable

-   run visudo and add that line:

-   nagios  ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/

 

-   service nagios-nrpe-server restart

change setup in ceck_temp.sh:

# Warning threshold
thresh_warn=80
# Critical threshold
thresh_crit=90
# Hardware to monitor
sensor=Core

!!! Make sure that the right values are placed for the HDD (SDD) in /etc/nagios/nrpe.cfg

command[check_hddtemp]=sudo /usr/lib/nagios/plugins/check_hddtemp.sh /dev/sda 45 60


autofs:
-     mkdir /data /devhome
-     chmod 777 /data /devhome
-    put the next line in to "/etc/auto.master"
-     /-    /etc/auto.data
-    /-   /etc/auto.devhome

-    put the next line in to "/etc/auto.devhome

-    /devhome    -rw,soft,nfsvers=3  192.168.81.200:/devhome


-    put the next line in to "/etc/auto.data"
-    /data   -rw,soft,nfsvers=3   131.169.247.61:/data

-    and the last is:
-    change the timeout to 60 seconds if you want in "/etc/default/autofs"
-    restart the autofs "/etc/init.d/autofs restart"


Syslog-ng
-    Configuration from “/etc/syslog-ng/syslog-ng.conf” change
-    Put in: udp();
 

rsyslog

- *.* @192.168.81.200 >> /etc/rsyslog.d/50-default.conf

on Craits:

-   destination d_exflutcadev { udp("192.168.81.200" port(514)); };
-   log { source(s_src); filter(f_messages); destination (d_exflutcadev); };

Grub
-    /etc/default/grub
-    #GRUB_CMDLINE_LINUX_DEFAULT="pciehp.pciehp_force=1 pciehp.pciehp_debug=1"
  -    GRUB_CMDLINE_LINUX="pciehp.pciehp_force=1 pciehp.pciehp_debug=1 pcie_ports=native console=tty0 console=ttyS0,115200 rootwait rootdelay=90"
-      GRUB_TERMINAL="console"
-      GRUB_SERIAL_COMMAND="serial --unit=0"


---------
-    commands:
-    update-grub  update-grub2
-    grub-install /dev/sda
----------

PROXY

put in the lines:

http_proxy="http://exflwgs06.desy.de:3128"
https_proxy="http://exflwgs06.desy.de:3128"
ftp_proxy="http://exflwgs06.desy.de:3128"
 

in /etc/environment

 
Modules
-    /etc/modules
-    insert the modules:
-    pciedev
-    upciedev

-    x1timer

-    spd_adq_pci


--------------
-    command: depmod –a

NEW:  /etc/init/tty0.conf
# tty0 - getty
#
# This service maintains a getty on tty1 from the point the system is
# started until it is shut down again.

start on stopped rc RUNLEVEL=[2345] and (
            not-container or
            container CONTAINER=lxc or
            container CONTAINER=lxc-libvirt)

stop on runlevel [!2345]

respawn
exec /sbin/getty -8 115200 tty0

NEW: /etc/init/ttyS0.conf
# ttyS0 - getty
#
# This service maintains a getty on tty1 from the point the system is
# started until it is shut down again.

start on stopped rc RUNLEVEL=[2345] and (
            not-container or
            container CONTAINER=lxc or
            container CONTAINER=lxc-libvirt)

stop on runlevel [!2345]

respawn
exec /sbin/getty -8 115200 ttyS0

/etc/smartd.conf:

/dev/sda -a -o on -S on -s (S/../.././03|L/../../6/03) -m root -M exec /usr/share/smartmontools/smartd-runner

to /etc/smartd.conf and started the smartd server.

and coment out the line:  start_smartd=yes

in: /etc/defaults/smartmontools

 

/etc/hddtemp:

Put in the Line:

sudo echo '"Samsung SSD 840 EVO 120G B" 190 C "Samsung SSD 840 EVO 120GB"' >> /etc/hddtemp.db

sudo echo "Samsung SSD 840 PRO Serise" 190  C  "Samsung SSD 840 PRO Serise" >> /etc/hddtemp.db

 

===============================================
Home
-    mkdir /data
-      command: tune2fs -e remount-ro /dev/sda1
 

Group's added:
-  3555 exfel
-  5478 exfl_jet

users added:
- 23081 exfel babies
- 21502 exfel ballakk
- 21370 exfel baskaran
- 8323  exfel coppola
- 19134 exfel emotuk
- 2512  exfel esenov
- 19446 exfel fernands
- 19499 exfel gessler
- 21126 exfel mdonato
- 23421 exfel sotoudin
- 23972 exfel utcaadm
- 20145 exfel abeckman


Networkcard driver “e1000e”

-    driver with tar and gzip unpacking, make and makeinstall.

Install scripts:
-    /usr/local/bin/myri-irq-bind.sh

Configure scripts:
 
-    myri-irq-bind.sh:
-    “crontab –e” insert the line “@reboot sleep 120 && /usr/local/bin/myri-irq-bind.sh eth0 8”
-    “crontab –e” insert the line “@reboot sleep 120 && /usr/local/bin/x2timer-task-bind.sh"
-    “crontab –e” insert the line “@reboot sleep 120 && /usr/local/bin/eth_push.sh"
-    “crontab –e” insert the line “2 0 * * 7 /sbin/fstrim -v /"
-    “chmod 755 /usr/local/bin/myri-irq-bind.sh
-    “chmod 755 /usr/local/bin/x2timer-task-bind.sh"
-    “chmod 755 /usr/local/bin/eth_push.sh"
 

x2timer-task-bind.sh

#!/bin/bash
taskset -p 04 `cat /export/doocs/server/x2timer_server/x2timer_server.PID`

In the configfile "/etc/init/portmap.conf" I change the option from "-w" to "-i -w" on all crates, now the x2timer comes up.

 

eth_push.sh

#!/bin/bash
/sbin/ethtool -G eth0 rx 4096 tx 4096

  

Attachment 1: myri-irq-bind.sh  1 kB  Uploaded 04 Dec 2013, 09:57  | Hide | Hide all
#!/bin/bash
#set -x

if [ $# -eq 0 ]; then
   echo "usage: msixbind.sh INTERFACE [CPU#]"
   exit 1;
fi

eth=$1
mask=$2

echo "Binding interface $eth"
pid=`pgrep irqbalance`
   if [ $? -eq 0 ];
   then
       echo "irqbalance is running! Pid = $pid"
       echo "it will undo anything done by this script"
       echo "Please kill it and re-run this script"
       exit
   fi

done=0
i=0
slice=0
start=0
num_slices=`grep "${eth}" /proc/interrupts | wc -l`
while [ $done != 1 ]
do
# one of the following, depending on which version of the driver is installed
   irq_data=`grep "${eth}:slice-${slice}" /proc/interrupts`

   if [ $? != 0 ];
   then
       if [ $i != 0 ];
       then
           exit
       fi
       irq_data=`grep "${eth}" /proc/interrupts`
       if [ $? != 0 ];
       then
           exit
       fi
   fi
   irq=`echo $irq_data |  awk '{print $1 ; }' | sed -e 's/://g'`
   file="/proc/irq/${irq}/smp_affinity"
   printf "Binding slice %2d to CPU %2d: writing mask 0x%08x to $file\n" $slice $mask $mask
   printf "%x" $mask > $file
   i=`expr $i + 1`
   slice=`expr $slice + 1`
   if [ $slice -eq $num_slices ];
   then
       exit
   fi
done
Attachment 2: nrpe.cfg  7 kB  Uploaded 15 Dec 2014, 13:57  | Hide | Hide all
#############################################################################
# Sample NRPE Config File 
# Written by: Ethan Galstad (nagios@nagios.org)
# 
# Last Modified: 11-23-2007
#
# NOTES:
# This is a sample configuration file for the NRPE daemon.  It needs to be
# located on the remote host that is running the NRPE daemon, not the host
# from which the check_nrpe client is being executed.
#############################################################################


# LOG FACILITY
# The syslog facility that should be used for logging purposes.

log_facility=daemon



# PID FILE
# The name of the file in which the NRPE daemon should write it's process ID
# number.  The file is only written if the NRPE daemon is started by the root
# user and is running in standalone mode.

pid_file=/var/run/nagios/nrpe.pid



# PORT NUMBER
# Port number we should wait for connections on.
# NOTE: This must be a non-priviledged port (i.e. > 1024).
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd

server_port=5666



# SERVER ADDRESS
# Address that nrpe should bind to in case there are more than one interface
# and you do not want nrpe to bind on all interfaces.
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd

#server_address=127.0.0.1



# NRPE USER
# This determines the effective user that the NRPE daemon should run as.  
# You can either supply a username or a UID.
# 
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd

nrpe_user=nagios



# NRPE GROUP
# This determines the effective group that the NRPE daemon should run as.  
# You can either supply a group name or a GID.
# 
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd

nrpe_group=nagios



# ALLOWED HOST ADDRESSES
# This is an optional comma-delimited list of IP address or hostnames 
# that are allowed to talk to the NRPE daemon.
#
# Note: The daemon only does rudimentary checking of the client's IP
# address.  I would highly recommend adding entries in your /etc/hosts.allow
# file to allow only the specified host to connect to the port
# you are running this daemon on.
#
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd

allowed_hosts=127.0.0.1,exflserv05
 


# COMMAND ARGUMENT PROCESSING
# This option determines whether or not the NRPE daemon will allow clients
# to specify arguments to commands that are executed.  This option only works
# if the daemon was configured with the --enable-command-args configure script
# option.  
#
# *** ENABLING THIS OPTION IS A SECURITY RISK! *** 
# Read the SECURITY file for information on some of the security implications
# of enabling this variable.
#
# Values: 0=do not allow arguments, 1=allow command arguments

dont_blame_nrpe=0



# COMMAND PREFIX
# This option allows you to prefix all commands with a user-defined string.
# A space is automatically added between the specified prefix string and the
# command line from the command definition.
#
# *** THIS EXAMPLE MAY POSE A POTENTIAL SECURITY RISK, SO USE WITH CAUTION! ***
# Usage scenario: 
# Execute restricted commmands using sudo.  For this to work, you need to add
# the nagios user to your /etc/sudoers.  An example entry for alllowing 
# execution of the plugins from might be:
#
# nagios          ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/
#
# This lets the nagios user run all commands in that directory (and only them)
# without asking for a password.  If you do this, make sure you don't give
# random users write access to that directory or its contents!

# command_prefix=/usr/bin/sudo 



# DEBUGGING OPTION
# This option determines whether or not debugging messages are logged to the
# syslog facility.
# Values: 0=debugging off, 1=debugging on

debug=0



# COMMAND TIMEOUT
# This specifies the maximum number of seconds that the NRPE daemon will
# allow plugins to finish executing before killing them off.

command_timeout=60



# CONNECTION TIMEOUT
# This specifies the maximum number of seconds that the NRPE daemon will
# wait for a connection to be established before exiting. This is sometimes
# seen where a network problem stops the SSL being established even though
# all network sessions are connected. This causes the nrpe daemons to
# accumulate, eating system resources. Do not set this too low.

connection_timeout=300



# WEEK RANDOM SEED OPTION
# This directive allows you to use SSL even if your system does not have
# a /dev/random or /dev/urandom (on purpose or because the necessary patches
# were not applied). The random number generator will be seeded from a file
# which is either a file pointed to by the environment valiable $RANDFILE
# or $HOME/.rnd. If neither exists, the pseudo random number generator will
# be initialized and a warning will be issued.
# Values: 0=only seed from /dev/[u]random, 1=also seed from weak randomness

#allow_weak_random_seed=1



# INCLUDE CONFIG FILE
# This directive allows you to include definitions from an external config file.

#include=<somefile.cfg>



# INCLUDE CONFIG DIRECTORY
# This directive allows you to include definitions from config files (with a
# .cfg extension) in one or more directories (with recursion).

#include_dir=<somedirectory>
#include_dir=<someotherdirectory>



# COMMAND DEFINITIONS
# Command definitions that this daemon will run.  Definitions
# are in the following format:
#
# command[<command_name>]=<command_line>
#
# When the daemon receives a request to return the results of <command_name>
# it will execute the command specified by the <command_line> argument.
#
# Unlike Nagios, the command line cannot contain macros - it must be
# typed exactly as it should be executed.
#
# Note: Any plugins that are used in the command lines must reside
# on the machine that this daemon is running on!  The examples below
# assume that you have plugins installed in a /usr/local/nagios/libexec
# directory.  Also note that you will have to modify the definitions below
# to match the argument format the plugins expect.  Remember, these are
# examples only!


# The following examples use hardcoded command arguments...

command[check_users]=/usr/lib/nagios/plugins/check_users -w 5 -c 10
command[check_load]=/usr/lib/nagios/plugins/check_load -w 15,10,5 -c 30,25,20
command[check_sda]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/sda
command[check_zombie_procs]=/usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/lib/nagios/plugins/check_procs -w 150 -c 200 
command[check_cputemp]=/usr/lib/nagios/plugins/check_temp.sh
command[check_sensors]=/usr/lib/nagios/plugins/check_sensors
command[check_hddtemp]=sudo /usr/lib/nagios/plugins/check_hddtemp.sh /dev/sda 45 60
command[check_ntp]=/usr/lib/nagios/plugins/check_ntp -H localhost
command[check_smart]=sudo /usr/lib/nagios/plugins/check_ide_smart -d /dev/sda -n



# The following examples allow user-supplied arguments and can
# only be used if the NRPE daemon was compiled with support for 
# command arguments *AND* the dont_blame_nrpe directive in this
# config file is set to '1'.  This poses a potential security risk, so
# make sure you read the SECURITY file before doing this.

#command[check_users]=/usr/lib/nagios/plugins/check_users -w $ARG1$ -c $ARG2$
#command[check_load]=/usr/lib/nagios/plugins/check_load -w $ARG1$ -c $ARG2$
#command[check_disk]=/usr/lib/nagios/plugins/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
#command[check_procs]=/usr/lib/nagios/plugins/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$

#
# local configuration:
#	if you'd prefer, you can instead place directives here
include=/etc/nagios/nrpe_local.cfg

# 
# you can place your config snipplets into nrpe.d/
# only snipplets ending in .cfg will get included
include_dir=/etc/nagios/nrpe.d/

Attachment 3: check_temp.sh  7 kB  Uploaded 15 Dec 2014, 13:57  | Hide | Hide all
#!/bin/bash

################################################################################
#                                                                              #
#  Copyright (C) 2011 Jack-Benny Persson <jake@cyberinfo.se>                   #
#                                                                              #
#   This program is free software; you can redistribute it and/or modify       #
#   it under the terms of the GNU General Public License as published by       #
#   the Free Software Foundation; either version 2 of the License, or          #
#   (at your option) any later version.                                        #
#                                                                              #
#   This program is distributed in the hope that it will be useful,            #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of             #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              #
#   GNU General Public License for more details.                               #
#                                                                              #
#   You should have received a copy of the GNU General Public License          #
#   along with this program; if not, write to the Free Software                #
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  #
#                                                                              #
################################################################################

###############################################################################
#                                                                             #	
# Nagios plugin to monitor CPU and M/B temperature with sensors.              #
# Written in Bash (and uses sed & awk).                                       #
# Latest version of check_temp can be found at the below URL:                 #
# https://github.com/jackbenny/check_temp                                     #
#                                                                             #
# If you are having problems getting it to work, check the instructions in    #
# the README first. It walks you though install lm-sensors and getting it to  #
# display sensor data.                                                        #
#                                                                             #
###############################################################################

VERSION="Version 0.8"
AUTHOR="(c) 2011 Jack-Benny Persson (jack-benny@cyberinfo.se)"

# Sensor program
SENSORPROG=/usr/bin/sensors

# Exit codes
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3

shopt -s extglob

#### Functions ####

# Print version information
print_version()
{
	printf "\n\n$0 - $VERSION\n"
}

#Print help information
print_help()
{
	print_version
	printf "$AUTHOR\n"
	printf "Monitor temperature with the use of sensors\n"
/bin/cat <<EOT

Options:
-h
   Print detailed help screen
-V
   Print version information
-v
   Verbose output

--sensor WORD
   Set what to monitor, for example CPU or MB (or M/B). Check sensors for the
   correct word. Default is CPU.
-w INTEGER
   Exit with WARNING status if above INTEGER degres
-c INTEGER
   Exit with CRITICAL status if above INTEGER degres
EOT
}


###### MAIN ########

# Warning threshold
thresh_warn=60
# Critical threshold
thresh_crit=80
# Hardware to monitor
sensor=Core

# See if we have sensors program installed and can execute it
if [[ ! -x "$SENSORPROG" ]]; then
	printf "\nIt appears you don't have sensors installed in $SENSORPROG\n"
	exit $STATE_UNKOWN
fi

# Parse command line options
while [[ -n "$1" ]]; do 
   case "$1" in

       -h | --help)
           print_help
           exit $STATE_OK
           ;;

       -V | --version)
           print_version
           exit $STATE_OK
           ;;

       -v | --verbose)
           : $(( verbosity++ ))
           shift
           ;;

       -w | --warning)
           if [[ -z "$2" ]]; then
               # Threshold not provided
               printf "\nOption $1 requires an argument"
               print_help
               exit $STATE_UNKNOWN
            elif [[ "$2" = +([0-9]) ]]; then
               # Threshold is an integer 
               thresh=$2
            else
               # Threshold is not an integer
               printf "\nThreshold must be an integer"
               print_help
               exit $STATE_UNKNOWN
           fi
           thresh_warn=$thresh
	   shift 2
           ;;

       -c | --critical)
           if [[ -z "$2" ]]; then
               # Threshold not provided
               printf "\nOption '$1' requires an argument"
               print_help
               exit $STATE_UNKNOWN
            elif [[ "$2" = +([0-9]) ]]; then
               # Threshold is an integer 
               thresh=$2
            else
               # Threshold is not an integer
               printf "\nThreshold must be an integer"
               print_help
               exit $STATE_UNKNOWN
           fi
           thresh_crit=$thresh
	   shift 2
           ;;

       -\?)
           print_help
           exit $STATE_OK
           ;;

       --sensor)
	   if [[ -z "$2" ]]; then
		printf "\nOption $1 requires an argument"
		print_help
		exit $STATE_UNKNOWN
	   fi
		sensor=$2
           shift 2
           ;;

       *)
           printf "\nInvalid option '$1'"
           print_help
           exit $STATE_UNKNOWN
           ;;
   esac
done


# Check if a sensor were specified
if [[ -z "$sensor" ]]; then
	# No sensor to monitor were specified
	printf "\nNo sensor specified"
	print_help
	exit $STATE_UNKNOWN
fi


#Get the temperature
TEMP=`${SENSORPROG} | grep "$sensor" | cut -d+ -f2 | cut -c1-2 | head -n1`
#Old way - Get the temperature
#TEMP=`${SENSORPROG} | grep "$sensor" | awk '{print $3}' | cut -c2-3 | head -n1`


# Check if the thresholds have been set correctly
if [[ -z "$thresh_warn" || -z "$thresh_crit" ]]; then
	# One or both thresholds were not specified
	printf "\nThreshold not set"
	print_help
	exit $STATE_UNKNOWN
  elif [[ "$thresh_crit" -lt "$thresh_warn" ]]; then
	# The warning threshold must be lower than the critical threshold
	printf "\nWarning temperature should be lower than critical"
	print_help
	exit $STATE_UNKNOWN
fi


# Verbose output
if [[ "$verbosity" -ge 1 ]]; then
   /bin/cat <<__EOT
Debugging information:
  Warning threshold: $thresh_warn 
  Critical threshold: $thresh_crit
  Verbosity level: $verbosity
  Current $sensor temperature: $TEMP
__EOT
printf "\n  Temperature lines directly from sensors:\n"
${SENSORPROG}
printf "\n\n"
fi

# Get performance data for Nagios "Performance Data" field
PERFDATA=`${SENSORPROG} | grep "$sensor" | head -n1`


# And finally check the temperature against our thresholds
if [[ "$TEMP" != +([0-9]) ]]; then
	# Temperature not found for that sensor
	printf "No data found for that sensor ($sensor)\n"
	exit $STATE_UNKNOWN
	
  elif [[ "$TEMP" -gt "$thresh_crit" ]]; then
	# Temperature is above critical threshold
	echo "$sensor CRITICAL - Temperature is $TEMP | $PERFDATA"
	exit $STATE_CRITICAL

  elif [[ "$TEMP" -gt "$thresh_warn" ]]; then
	# Temperature is above warning threshold
	echo "$sensor WARNING - Temperature is $TEMP | $PERFDATA"
	exit $STATE_WARNING

  else
	# Temperature is ok
	echo "$sensor OK - Temperature is $TEMP | $PERFDATA"
	exit $STATE_OK
fi
exit 3
Attachment 4: check_hddtemp.sh  2 kB  Uploaded 15 Dec 2014, 13:58  | Hide | Hide all
#!/bin/bash
#
# USAGE:
# ./check_hddtemp.sh <device> <warn> <crit>
# Nagios script to get the temperatue of HDD from hddtemp
#
# You may have to let nagios run this script as root
# This is how the sudoers file looks in my debian system:
# nagios  ALL=(root) NOPASSWD:/usr/lib/nagios/plugins/check_hddtemp.sh
#
# Version 1.0

OK=0
WARNING=1
CRITICAL=2
UNKNOWN=3

function usage()
{
	echo "Usage: ./check_hddtemp.sh <device> <warn> <crit>"
}

function check_root()
{
	# make sure script is running as root
	if [ `whoami` != root ]; then
		echo "UNKNOWN: please make sure script is running as root"
		exit $UNKNOWN
	fi
}
function check_arg()
{
	# make sure you supplied all 3 arguments
	if [ $# -ne 3 ]; then
		usage
		exit $OK
	fi
}
function check_device()
{
	# make sure device is a special block
	if [ ! -b $DEVICE ];then
		echo "UNKNOWN: $DEVICE is not a block special file"
		exit $UNKNOWN
	fi
}
function check_warn_vs_crit()
{
	# make sure CRIT is larger than WARN
	if [ $WARN -ge $CRIT ];then
		echo "UNKNOWN: WARN value may not be greater than or equal the CRIT value"
		exit $UNKNOWN
	fi
}

function init()
{
check_root
check_arg $*
check_device
check_warn_vs_crit
}

function get_hddtemp()
{
	# gets temperature and stores it in $HEAT
	# and make sure we get a numeric output
	if [ -x $HDDTEMP ];then
		HEAT=`$HDDTEMP $DEVICE -n`
		case "$HEAT" in
		[0-9]* )
			echo "do nothing" > /dev/null
			;;
		* )
			echo "UNKNOWN: Could not get temperature from: $DEVICE"
			exit $UNKNOWN
			;;
		esac
	else
		echo "UNKNOWN: cannot execute $HDDTEMP"
		exit $UNKNOWN
	fi
}
function check_heat()
{
	# checks temperature and replies according to $CRIT and $WARN
	if [ $HEAT -lt $WARN ];then
		echo "OK: Temperature is below warn treshold ($DEVICE is $HEAT)"
		exit $OK
	elif [ $HEAT -lt $CRIT ];then
		echo "WARNING: Temperature is above warn treshold ($DEVICE is $HEAT)"
		exit $WARNING
	elif [ $HEAT -ge $CRIT ];then
		echo "CRITICAL: Temperature is above crit treshold ($DEVICE is $HEAT)"
		exit $CRITICAL
	else
		echo "UNKNOWN: This error message should never occur, if it does happen anyway, get a new cup of coffee and fix the code :)"
		exit $UNKNOWN
	fi
		
}

# -- Main -- #

HDDTEMP=/usr/sbin/hddtemp
DEVICE=$1
WARN=$2
CRIT=$3


init $*
get_hddtemp
check_heat
ELOG V3.1.4-7c3fd00