安装nagios 参考:
  http://www.tuicool.com/articles/yUrqiyr
一。 环境:
      1. OEL 6.8
      2. 设置hostname 并且安装好apache
      3. 关闭selinux及iptables

二。 安装nagios服务器端:
      1.  rpm -Uvh http://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
      2. yum -y install nagios nagios-plugins-all nagios-plugins-nrpe nrpe php
      3. chkconfig httpd on && chkconfig nagios on
      4. service httpd start && service nagios start
      5. 设置nagiosadmin密码:
          htpasswd -c /etc/nagios/htpasswd.users nagiosadmin
      6. 登陆: 开浏览器,输入密码,在host里面应该有“localhost”这台机器 

         http://{your_ip_address}/nagios


三。 安装pnp4nagios:
       1. yum install pnp4nagios rrdtool
四。配置pnp4nagios:
      1,修改nagios.cfg
[root@localhost objects]# vim /etc/nagios/nagios.cfg     //修改以下内容   
process_performance_data=1                              //由0改为1   
host_perfdata_command=process-host-perfdata             //前面的注释拿掉   
service_perfdata_command=process-service-perfdata       //注释拿掉   
enable_environment_macros=1                             //如果有注释拿掉   
     2,修改commands.cfg
         注释掉原有对process-host-perfdata和process-service-perfdata,重新定义
[root@localhost objects]# vim /etc/nagios/objects/commands.cfg  
define command {  
       command_name    process-service-perfdata  
       command_line    /usr/bin/perl /usr/libexec/pnp4nagios/process_perfdata.pl  
}  
  
define command {  
       command_name    process-host-perfdata  
       command_line    /usr/bin/perl /usr/libexec/pnp4nagios/process_perfdata.pl -d HOSTPERFDATA  



define command {
      command_name      check_nrpe
      command_line      $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
     3、修改配置文件templates.cfg
[root@localhost objects]# vim /etc/nagios/objects/templates.cfg   
define host{
                name    host-pnp
                register        0
                action_url /pnp4nagios/graph?host=$HOSTNAME$
}

define service{
                name    srv-pnp
                register        0
                action_url /pnp4nagios/graph?host=$HOSTNAME$&srv=$SERVICEDESC$
}
##用来打开绘图历史的,也就是nagios里面的小太阳图标。
     4,修改服务器配置文件localhost.cfg
[root@localhost objects]#  cat /etc/nagios/objects/localhost.cfg 
###############################################################################
# LOCALHOST.CFG - SAMPLE OBJECT CONFIG FILE FOR MONITORING THIS MACHINE
#
# Last Modified: 05-31-2007
#
# NOTE: This config file is intended to serve as an *extremely* simple 
#       example of how you can create configuration entries to monitor
#       the local (Linux) machine.
#
###############################################################################


###############################################################################
###############################################################################
#
# HOST DEFINITION
#
###############################################################################
###############################################################################

# Define a host for the local machine

define host{
        use                     linux-server,host-pnp            ; Name of host template to use
                                                        ; This host definition will inherit all variables that are defined
                                                        ; in (or inherited by) the linux-server host template definition.
        host_name               localhost
        alias                   localhost
        address                 127.0.0.1
        }

###############################################################################
###############################################################################
#
# HOST GROUP DEFINITION
#
###############################################################################
###############################################################################

# Define an optional hostgroup for Linux machines

define hostgroup{
        hostgroup_name  linux-servers ; The name of the hostgroup
        alias           Linux Servers ; Long name of the group
        members         localhost     ; Comma separated list of hosts that belong to this group
        }

###############################################################################
###############################################################################
#
# SERVICE DEFINITIONS
#
###############################################################################
###############################################################################

# Define a service to "ping" the local machine

define service{
        use                             local-service,srv-pnp         ; Name of service template to use
        host_name                       localhost
        service_description             PING
        check_command                   check_ping!100.0,20%!500.0,60%
        }

# Define a service to check the disk space of the root partition
# on the local machine.  Warning if < 20% free, critical if
# < 10% free space on partition.

define service{
        use                             local-service,srv-pnp         ; Name of service template to use
        host_name                       localhost
        service_description             Root Partition
        check_command                   check_local_disk!20%!10%!/
        }

# Define a service to check the number of currently logged in
# users on the local machine.  Warning if > 20 users, critical
# if > 50 users.

define service{
        use                             local-service,srv-pnp         ; Name of service template to use
        host_name                       localhost
        service_description             Current Users
        check_command                   check_local_users!20!50
        }

# Define a service to check the number of currently running procs
# on the local machine.  Warning if > 250 processes, critical if
# > 400 users.

define service{
        use                             local-service,srv-pnp         ; Name of service template to use
        host_name                       localhost
        service_description             Total Processes
        check_command                   check_local_procs!250!400!RSZDT
        }

# Define a service to check the load on the local machine. 

define service{
        use                             local-service,srv-pnp         ; Name of service template to use
        host_name                       localhost
        service_description             Current Load
        check_command                   check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
        }


# Define a service to check the swap usage the local machine. 
# Critical if less than 10% of swap is free, warning if less than 20% is free

define service{
        use                             local-service,srv-pnp         ; Name of service template to use
        host_name                       localhost
        service_description             Swap Usage
        check_command                   check_local_swap!20!10
        }

# Define a service to check SSH on the local machine.
# Disable notifications for this service by default, as not all users may have SSH enabled.

define service{
        use                             local-service,srv-pnp         ; Name of service template to use
        host_name                       localhost
        service_description             SSH
        check_command                   check_ssh
        notifications_enabled           0
        }


# Define a service to check HTTP on the local machine.
# Disable notifications for this service by default, as not all users may have HTTP enabled.

define service{
        use                             local-service,srv-pnp         ; Name of service template to use
        host_name                       localhost
        service_description             HTTP
        check_command                   check_http
        notifications_enabled           0
        }
define service{
        use                             local-service,srv-pnp      ; Name of service template to use
        host_name                       localhost
        service_description             check_oracle_tablespace
        check_command                   check_nrpe!check_tablespace_usage!prm!system!oracle!tablespace-usage!SYSTEM
        notifications_enabled           0
        }


nrpe也安装好了
vim /etc/nagios/nrpe.cfg
allowed_hosts=127.0.0.1,192.168.80.99,192.168.80.160 ##把需要通信的地址加上
dont_blame_nrpe=1 ##Values: 0=do not allow arguments, 1=allow command arguments
command[check_tablespace_usage]=/usr/lib64/nagios/plugins/check_oracle_health --connect=prm --user=system --password=oracle --mode=tablespace-usage --environment ORACLE_HOME=/u01/app/oracle/product/11.2.0/dbhome_1 --tablespace=SYSTEM

##注意 /usr/lib64/nagios/plugins/check_oracle_health 是把check_oracle_health复制过去的。使用/ect/nagios/libexec/check_oracle_health也可以,具体可看后面的安装check_oracle_health部分。

启动nrpe
/usr/sbin/nrpe -c /etc/nagios/nrpe.cfg  -d

##yum -y install nagios nagios-plugins-all nagios-plugins-nrpe nrpe php pnp4nagios rrdtool
## 这条命令是上面两个的集合


登陆错误处理
登陆http://{your_ip_address}/nagios,发现密码认证不对,请修改以下内容:
      vi /etc/httpd/conf.d/pnp4nagios.conf
         把 AuthUserFile /etc/nagios/passwd 修改为

          AuthUserFile /etc/nagios/htpasswd.users

修改apache对nagios安装的认证,否则web页面上输出nagiosadmin的用户名和密码后 无法登陆
[root@zabbixdemo conf.d]# cat /etc/httpd/conf.d/nagios.conf 
# SAMPLE CONFIG SNIPPETS FOR APACHE WEB SERVER
# Last Modified: 11-26-2005
#
# This file contains examples of entries that need
# to be incorporated into your Apache web server
# configuration file.  Customize the paths, etc. as
# needed to fit your system.

ScriptAlias /nagios/cgi-bin/ "/usr/lib64/nagios/cgi-bin/"

<Directory "/usr/lib64/nagios/cgi-bin/">
#  SSLRequireSSL
   Options ExecCGI
   AllowOverride None
   Order allow,deny
   Allow from all
#  Order deny,allow
#  Deny from all
#  Allow from 127.0.0.1
   AuthName "Nagios Access"
   AuthType Basic
   AuthUserFile /etc/nagios/htpasswd.users
   Require valid-user
</Directory>

Alias /nagios "/usr/share/nagios/html"

<Directory "/usr/share/nagios/html">
#  SSLRequireSSL
   Options None
   AllowOverride None
   Order allow,deny
   Allow from all
#  Order deny,allow
#  Deny from all
#  Allow from 127.0.0.1
   AuthName "Nagios Access"
   AuthType Basic
   AuthUserFile /etc/nagios/htpasswd.users
   Require valid-user
</Directory


安装check_oracle_health
1、需要先安装oracle的client,配置好tns、配置好/etc/profile
ORACLE_HOME=/u01/app/oracle/product/11.2.0/dbhome_1
LD_LIBRARY_PATH=$ORACLE_HOME/lib:/usr/lib
export ORACLE_HOME LD_LIBRARY_PATH
export PATH=$PATH:$ORACLE_HOME/bin
ORACLE_BASE=/u01/app/oracle
ORACLE_SID=prm
ORACLE_HOME=/u01/app/oracle/product/11.2.0/dbhome_1
export ORACLE_BASE ORACLE_SID ORACLE_HOME
export PATH=/usr/java/jdk1.8.0_20/bin:$PATH:$ORACLE_HOME/bin
export TNS_ADMIN=$ORACLE_HOME/network/admin

生效:
  source /etc/profile

加载lib
root:
  echo $ORACLE_HOME/lib >> /etc/ld.so.conf
  ldconfig 
2、安装必要的包
参考:
  http://blog.itpub.net/29108064/viewspace-1108306/
下载DBI
wget http://search.cpan.org/CPAN/authors/id/T/TI/TIMB/DBI-1.609.tar.gz
tar zxvf DBI-1.609.tar.gz 
cd DBI-1.609
perl Makefile.PL 
make all
make install

安装DBD-Oracle
wget http://mirrors.neusoft.edu.cn/cpan/authors/id/P/PY/PYTHIAN/DBD-Oracle-1.52.tar.gz
tar zxvf DBD-Oracle-1.52.tar.gz 
cd DBD-Oracle-1.52
perl Makefile.PL
make 
make install

安装check_oracle_health
wget http://labs.consol.de/wp-content/uploads/2009/09/check_oracle_health-1.6.3.tar.gz
tar zxvf check_oracle_health-1.6.3.tar.gz 
cd check_oracle_health-1.6.3
./configure --prefix=/etc/nagios --with-nagios-user=nagios --with-nagios-group=nagios 
make all
make install
这样/ect/nagios/libexec目录下插件 check_oracle_health 就存在了


不过我为了方便,我把这个插件复制到到/usr/lib64/nagios/plugins/下了,所有的check_都在这个目录下
cp /ect/nagios/libexec/check_oracle_health /usr/lib64/nagios/plugins/

测试使用命令:
/etc/nagios/libexec/check_oracle_health --connect=prm --user=system --password=oracle --mode=tnsping
/etc/nagios/libexec/check_oracle_health --connect=prm --user=system --password=oracle --mode=tablespace-usage

/usr/lib64/nagios/plugins/check_nrpe -H 127.0.0.1 -c check_tablespace_usage

/etc/nagios/libexec/check_oracle_health --connect=prm --user=system --password=oracle --mode=tablespace-usage

/usr/lib64/nagios/plugins/check_nrpe -H 192.168.80.160-c check_tablespace_usage

/usr/lib64/nagios/plugins/check_oracle_health --connect=prm --user=system --password=oracle --mode=tablespace-usage

[root@zabbixdemo admin]# /usr/lib64/nagios/plugins/check_nrpe -H 192.168.80.99 -c check_tablespace_usage
OK - tbs USERS usage is 0.01%
tbs UNDOTBS1 usage is 0.03%
tbs TEMP usage is 0.00%
tbs SYSTEM usage is 2.18%
tbs SYSAUX usage is 1.62%
tbs EXAMPLE usage is 0.95% | 'tbs_users_usage_pct'=0.01%;90;98 'tbs_users_usage'=4MB;29491;32112;0;32767 'tbs_users_alloc'=6MB;;;0;32767 'tbs_undotbs1_usage_pct'=0.03%;90;98 'tbs_undotbs1_usage'=9MB;29491;32112;0;32767 'tbs_undotbs1_alloc'=95MB;;;0;32767 'tbs_temp_usage_pct'=0.00%;90;98 'tbs_temp_usage'=0MB;29491;32112;0;32767 'tbs_temp_alloc'=29MB;;;0;32767 'tbs_system_usage_pct'=2.18%;90;98 'tbs_system_usage'=714MB;29491;32112;0;32767 'tbs_system_alloc'=720MB;;;0;32767 'tbs_sysaux_usage_pct'=1.62%;90;98 'tbs_sysaux_usage'=531MB;29491;32112;0;32767 'tbs_sysaux_alloc'=560MB;;;0;32767 'tbs_example_usage_pct'=0.95%;90;98 'tbs_example_usage'=310MB;29491;32112;0;32767 'tbs_example_alloc'=345MB;;;0;32767

/usr/lib64/nagios/plugins/check_oracle_health --connect=prm --user=system --password=oracle --mode=tablespace-usage --environment ORACLE_HOME=/u01/app/oracle/product/11.2.0/dbhome_1

单独指定一个表空间进行监控
dont_blame_nrpe=1

/usr/lib64/nagios/plugins/check_oracle_health --connect=prm --user=system --password=oracle --mode=tablespace-usage --environment ORACLE_HOME=/u01/app/oracle/product/11.2.0/dbhome_1 --tablespace=SYSTEM

oracle监听的配置文件:
listener.ora
SID_LIST_LISTENER =
  (SID_LIST =
  (SID_DESC =
  (SID_NAME = PLSExtProc)
  (ORACLE_HOME = /u01/app/oracle/product/11.2.0/dbhome_1)
  (PROGRAM = extproc)
  )
  (SID_DESC =
  (GLOBAL_DBNAME = prm)
  (ORACLE_HOME = /u01/app/oracle/product/11.2.0/dbhome_1) 
  (SID_NAME = prm)
  )
  )

 LISTENER =
  (DESCRIPTION_LIST =
    (DESCRIPTION =
      (ADDRESS = (PROTOCOL = TCP)(HOST = 127.0.0.1)(PORT = 1521))
      (ADDRESS = (PROTOCOL = IPC)(KEY = EXTPROC1521))
    )
  )


ADR_BASE_LISTENER = /u01/app/oracle 


tnsnames.ora
[root@zabbixdemo admin]# cat tnsnames.ora 
# tnsnames.ora Network Configuration File: /u01/app/oracle/product/11.2.0/dbhome_1/network/admin/tnsnames.ora
# Generated by Oracle configuration tools.
PRM =
  (DESCRIPTION =
    (ADDRESS = (PROTOCOL = TCP)(HOST = 192.168.80.160)(PORT = 1521))
    (CONNECT_DATA =
      (SERVER = DEDICATED)
      (SERVICE_NAME = prm)
    )
  )

注意,需要给tnsnames.ora赋予nagios,nrpe可以访问的权限。
chmod 777 tnsnames.ora 

如果nrpe.cfg中dont_blame_nrpe=0的话,nagios上使用check_oracle_health会报错。
check_oracle_health [-v] [-t <timeout>] --connect=<connect string>
--username=<username> --password=<password> --mode=<mode>
--tablespace=<tablespace>


用户属组和权限:
chmod 777 tnsnames.ora 
[root@zabbixdemo objects]# id nrpe
uid=494(nrpe) gid=489(nrpe) groups=489(nrpe),490(nagios),500(oinstall),501(dba)
[root@zabbixdemo objects]# id nagios
uid=495(nagios) gid=490(nagios) groups=490(nagios),500(oinstall),501(dba)

如果不给nrpe赋予权限的话,使用nrpe的画图出不来
如果不给nagios赋予权限,不chmod 777 tnsnames.ora 得话,nagios无法取得远程数据库的情况,执行/usr/lib64/nagios/plugins/check_nrpe -H 192.168.80.99 -c check_tablespace_usage 访问不了tns数据

配置服务启动
chkconfig httpd on
chkconfig nagios on
chkconfig xinetd on 
/etc/rc.loca
  /usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -d
重启nrpe xinetd nagios命令
  kill -9 `ps -ef|grep nrpe|grep -v grep|awk '{print $2}'`;/usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -d;/etc/init.d/xinetd restart;service nagios reload

监控远程oracle总结:

  需要访问的那个远程oracle主机,不需要安装任何东西。只需要在nagios的server端配置好nrpe安装好oracle client就可以监控oracle数据库了,如果需要在监控其主机,需要在其主机上安装nrpe插件。



Logo

更多推荐