Group Selection Page HELP DOC Controls Group Data Analysis Group ITDM WP76 DET Vacuum XRO EEE WP78 WP74 FXE SCS SPB MID HED SQS SXP Sample Environment Photon Commissioning Team Operation PSPO XO TS
General electronics MicroTCA EEE Electronics Lab EEE Rack Room
  MicroTCA Logbook  Not logged in ELOG logo
Message ID: 59     Entry time: 03 Dec 2013, 10:39
Author: Frank Babies 
Type: Software Changes 
Category: utcaX 
Subject: base s/w and configuration changed on all utca's 

Puppet Setup in a XFEL environment:

Basics: -     Change BIOS Settings to boot via PXE (now USB)
-    Install Ubuntu 12.04.04-server-amd64
-    Setup Lang: EN, Keyb. EN (US)
-    /etc/fstab "/ option = discard,noatime,errors=remount-ro"

-    Network: 192.168.81.xx/24 gw. 192.168.81.16
-    DNS 131.169.40.200 131.169.194.200
-    Partition: sda1 / 86GB, Partition2: sda5 / 8GB swap, option = discard,noatime
-    Enable sshd
-    Disable ipv6

net.ipv6.conf.all.disable_ipv6=1
net.ipv6.conf.default.disable_ipv6=1
net.ipv6.conf.lo.disable_ipv6=1

-    Deinstall irqbalance
-    change swappines from 60% to10% (echo "vm.swappiness=10" >> /etc/sysctl.conf)
-    User:Passwd – utcaadm:xxxxx
-     %exfl_jet            ALL=(ALL:ALL) ALL  >>  /etc/sudoers
-    nagios  ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/

-     AddressFamily inet   >>  /etc/ssh/sshd_config

 

Blacklist Modules:

 -    lp
-    ppdev
-    parport_pc
-    parport
-    pcmouse
-    soundcore
-    snd
-    snd_page_alloc
-    snd_timer
-    snd_pmc
-    snd_hwdep
-    snd_hda_codec
-    snd_hda_intel

 


After reboot setup the Sourcen configuration:

             Change the entry in the /etc/apt/sources.list:
-    deb http://doocspkgs.desy.de/pub/doocs precise main
-    deb-src http://doocspkgs.desy.de/pub/doocs precise main
-    # DESY Ubuntu Repository
-    deb  http://nims.desy.de/ubuntu precise main restricted universe multiverse
-    deb  http://nims.desy.de/ubuntu precise-updates main restricted universe multiverse
-    deb   http://nims.desy.de/ubuntu precise-security main restricted universe multiverse
-    deb-src  http://nims.desy.de/ubuntu precise main restricted universe multiverse
-    deb-src  http://nims.desy.de/ubuntu precise-updates main restricted universe multiverse
-    deb-src  http://nims.desy.de/ubuntu precise-security main restricted universe multiverse


Install other packages:
-    ntp, rsyslog, pciedev-dkms, upciedev-dkms, doocs-dirs-min, x1timer-dkms, doocs-x2timer-server, doocs-pcie-tools, build-essential, dkms, module-assistant, locate, postfix, nedit, emacs, [spd-adq-pci-dkms*.deb, adqupdater_0.13437_amd64.deb,libadq0_0.13437_amd64.deb]
-   nagios-nrpe-server nagios-plugins, autofs, subversion, libnss3-dev, pkg-config, smartmontools, hddtemp, sysstat, postfix, python-qt4, hwinfo, mc, ipmitool
-   preload, ethtool, nfs-common, openssh-server, openssh-client, krb5-user, libpam-krb5, acpi, lm-sensors, rrdtool, perl, gmetad_3.6.0-1ubuntu2_amd64.deb, ganglia-monitor-python_3.6.0-1ubuntu2_all.deb, ganglia-monitor_3.6.0-1ubuntu2_amd64.deb, libganglia1_3.6.0-1ubuntu2_amd64.deb, libconfuse0

 
Configure the packages:

HOSTS
-127.0.0.1       localhost
#-127.0.1.1       exflutca3
#-192.168.81.106  exflutca3.desy.de       exflutca3
#-192.168.81.107  exflutca3-mch.desy.de   exflutca3-mch
-192.168.81.200  exflutcadev.desy.de     exflutcadev
# The following lines are desirable for IPv6 capable hosts
-#::1     ip6-localhost ip6-loopback
-#fe00::0 ip6-localnet
-#ff00::0 ip6-mcastprefix
-#ff02::1 ip6-allnodes
-#ff02::2 ip6-allrouters

NTP
-    Configuration from  "/etc/ntp.conf" change:
-    server ntp.desy.de
-    server ntp1.desy.de
-    server ntp2.desy.de
-    server ntp3.desy.de
-    fudge 127.127.1.0 stratum 13
-    restrict 127.0.0.1
-    restrict ::1
     STOP NTP -> ntpdate -> start ntp: "/usr/sbin/ntpd -p /var/run/ntpd.pid -g -u 106:113"

nagios:

-   Copy attached file nrpe.cfg to /etc/nagios

-   Copy rest of attached files to /usr/lib/nagios/plugins

-   Make them executable

-   run visudo and add that line:

-   nagios  ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/

 

-   service nagios-nrpe-server restart

change setup in ceck_temp.sh:

# Warning threshold
thresh_warn=80
# Critical threshold
thresh_crit=90
# Hardware to monitor
sensor=Core

!!! Make sure that the right values are placed for the HDD (SDD) in /etc/nagios/nrpe.cfg

command[check_hddtemp]=sudo /usr/lib/nagios/plugins/check_hddtemp.sh /dev/sda 45 60


autofs:
-     mkdir /data /devhome
-     chmod 777 /data /devhome
-    put the next line in to "/etc/auto.master"
-     /-    /etc/auto.data
-    /-   /etc/auto.devhome

-    put the next line in to "/etc/auto.devhome

-    /devhome    -rw,soft,nfsvers=3  192.168.81.200:/devhome


-    put the next line in to "/etc/auto.data"
-    /data   -rw,soft,nfsvers=3   131.169.247.61:/data

-    and the last is:
-    change the timeout to 60 seconds if you want in "/etc/default/autofs"
-    restart the autofs "/etc/init.d/autofs restart"


Syslog-ng
-    Configuration from “/etc/syslog-ng/syslog-ng.conf” change
-    Put in: udp();
 

rsyslog

- *.* @192.168.81.200 >> /etc/rsyslog.d/50-default.conf

on Craits:

-   destination d_exflutcadev { udp("192.168.81.200" port(514)); };
-   log { source(s_src); filter(f_messages); destination (d_exflutcadev); };

Grub
-    /etc/default/grub
-    #GRUB_CMDLINE_LINUX_DEFAULT="pciehp.pciehp_force=1 pciehp.pciehp_debug=1"
  -    GRUB_CMDLINE_LINUX="pciehp.pciehp_force=1 pciehp.pciehp_debug=1 pcie_ports=native console=tty0 console=ttyS0,115200 rootwait rootdelay=90"
-      GRUB_TERMINAL="console"
-      GRUB_SERIAL_COMMAND="serial --unit=0"


---------
-    commands:
-    update-grub  update-grub2
-    grub-install /dev/sda
----------

PROXY

put in the lines:

http_proxy="http://exflwgs06.desy.de:3128"
https_proxy="http://exflwgs06.desy.de:3128"
ftp_proxy="http://exflwgs06.desy.de:3128"
 

in /etc/environment

 
Modules
-    /etc/modules
-    insert the modules:
-    pciedev
-    upciedev

-    x1timer

-    spd_adq_pci


--------------
-    command: depmod –a

NEW:  /etc/init/tty0.conf
# tty0 - getty
#
# This service maintains a getty on tty1 from the point the system is
# started until it is shut down again.

start on stopped rc RUNLEVEL=[2345] and (
            not-container or
            container CONTAINER=lxc or
            container CONTAINER=lxc-libvirt)

stop on runlevel [!2345]

respawn
exec /sbin/getty -8 115200 tty0

NEW: /etc/init/ttyS0.conf
# ttyS0 - getty
#
# This service maintains a getty on tty1 from the point the system is
# started until it is shut down again.

start on stopped rc RUNLEVEL=[2345] and (
            not-container or
            container CONTAINER=lxc or
            container CONTAINER=lxc-libvirt)

stop on runlevel [!2345]

respawn
exec /sbin/getty -8 115200 ttyS0

/etc/smartd.conf:

/dev/sda -a -o on -S on -s (S/../.././03|L/../../6/03) -m root -M exec /usr/share/smartmontools/smartd-runner

to /etc/smartd.conf and started the smartd server.

and coment out the line:  start_smartd=yes

in: /etc/defaults/smartmontools

 

/etc/hddtemp:

Put in the Line:

sudo echo '"Samsung SSD 840 EVO 120G B" 190 C "Samsung SSD 840 EVO 120GB"' >> /etc/hddtemp.db

sudo echo "Samsung SSD 840 PRO Serise" 190  C  "Samsung SSD 840 PRO Serise" >> /etc/hddtemp.db

 

===============================================
Home
-    mkdir /data
-      command: tune2fs -e remount-ro /dev/sda1
 

Group's added:
-  3555 exfel
-  5478 exfl_jet

users added:
- 23081 exfel babies
- 21502 exfel ballakk
- 21370 exfel baskaran
- 8323  exfel coppola
- 19134 exfel emotuk
- 2512  exfel esenov
- 19446 exfel fernands
- 19499 exfel gessler
- 21126 exfel mdonato
- 23421 exfel sotoudin
- 23972 exfel utcaadm
- 20145 exfel abeckman


Networkcard driver “e1000e”

-    driver with tar and gzip unpacking, make and makeinstall.

Install scripts:
-    /usr/local/bin/myri-irq-bind.sh

Configure scripts:
 
-    myri-irq-bind.sh:
-    “crontab –e” insert the line “@reboot sleep 120 && /usr/local/bin/myri-irq-bind.sh eth0 8”
-    “crontab –e” insert the line “@reboot sleep 120 && /usr/local/bin/x2timer-task-bind.sh"
-    “crontab –e” insert the line “@reboot sleep 120 && /usr/local/bin/eth_push.sh"
-    “crontab –e” insert the line “2 0 * * 7 /sbin/fstrim -v /"
-    “chmod 755 /usr/local/bin/myri-irq-bind.sh
-    “chmod 755 /usr/local/bin/x2timer-task-bind.sh"
-    “chmod 755 /usr/local/bin/eth_push.sh"
 

x2timer-task-bind.sh

#!/bin/bash
taskset -p 04 `cat /export/doocs/server/x2timer_server/x2timer_server.PID`

In the configfile "/etc/init/portmap.conf" I change the option from "-w" to "-i -w" on all crates, now the x2timer comes up.

 

eth_push.sh

#!/bin/bash
/sbin/ethtool -G eth0 rx 4096 tx 4096

  

Attachment 1: myri-irq-bind.sh  1 kB  Uploaded 04 Dec 2013, 09:57  | Show | Hide all | Show all
Attachment 2: nrpe.cfg  7 kB  Uploaded 15 Dec 2014, 13:57  | Show | Hide all | Show all
Attachment 3: check_temp.sh  7 kB  Uploaded 15 Dec 2014, 13:57  | Show | Hide all | Show all
Attachment 4: check_hddtemp.sh  2 kB  Uploaded 15 Dec 2014, 13:58  | Hide | Hide all | Show all
#!/bin/bash
#
# USAGE:
# ./check_hddtemp.sh <device> <warn> <crit>
# Nagios script to get the temperatue of HDD from hddtemp
#
# You may have to let nagios run this script as root
# This is how the sudoers file looks in my debian system:
# nagios  ALL=(root) NOPASSWD:/usr/lib/nagios/plugins/check_hddtemp.sh
#
# Version 1.0

OK=0
WARNING=1
CRITICAL=2
UNKNOWN=3

function usage()
{
	echo "Usage: ./check_hddtemp.sh <device> <warn> <crit>"
}

function check_root()
{
	# make sure script is running as root
	if [ `whoami` != root ]; then
		echo "UNKNOWN: please make sure script is running as root"
		exit $UNKNOWN
	fi
}
function check_arg()
{
	# make sure you supplied all 3 arguments
	if [ $# -ne 3 ]; then
		usage
		exit $OK
	fi
}
function check_device()
{
	# make sure device is a special block
	if [ ! -b $DEVICE ];then
		echo "UNKNOWN: $DEVICE is not a block special file"
		exit $UNKNOWN
	fi
}
function check_warn_vs_crit()
{
	# make sure CRIT is larger than WARN
	if [ $WARN -ge $CRIT ];then
		echo "UNKNOWN: WARN value may not be greater than or equal the CRIT value"
		exit $UNKNOWN
	fi
}

function init()
{
check_root
check_arg $*
check_device
check_warn_vs_crit
}

function get_hddtemp()
{
	# gets temperature and stores it in $HEAT
	# and make sure we get a numeric output
	if [ -x $HDDTEMP ];then
		HEAT=`$HDDTEMP $DEVICE -n`
		case "$HEAT" in
		[0-9]* )
			echo "do nothing" > /dev/null
			;;
		* )
			echo "UNKNOWN: Could not get temperature from: $DEVICE"
			exit $UNKNOWN
			;;
		esac
	else
		echo "UNKNOWN: cannot execute $HDDTEMP"
		exit $UNKNOWN
	fi
}
function check_heat()
{
	# checks temperature and replies according to $CRIT and $WARN
	if [ $HEAT -lt $WARN ];then
		echo "OK: Temperature is below warn treshold ($DEVICE is $HEAT)"
		exit $OK
	elif [ $HEAT -lt $CRIT ];then
		echo "WARNING: Temperature is above warn treshold ($DEVICE is $HEAT)"
		exit $WARNING
	elif [ $HEAT -ge $CRIT ];then
		echo "CRITICAL: Temperature is above crit treshold ($DEVICE is $HEAT)"
		exit $CRITICAL
	else
		echo "UNKNOWN: This error message should never occur, if it does happen anyway, get a new cup of coffee and fix the code :)"
		exit $UNKNOWN
	fi
		
}

# -- Main -- #

HDDTEMP=/usr/sbin/hddtemp
DEVICE=$1
WARN=$2
CRIT=$3


init $*
get_hddtemp
check_heat
ELOG V3.1.4-7c3fd00