#!/usr/bin/python
#First things first; we tell our shell -> use python to interpret and run this script.. This way, we don't need to write pyhton in front of the script name -- everytime we execute this script
#usage : ./runaway_frmweb_handler.py
#we run the script with the forms server OS user / EBS application owner OS user.. ex: applmgr
"""
There may be some cases, where we have forms processes left over and running for a long time without doing anything.
In most of the cases, we see them spinning on the CPU and we identify them with their huge CPU usage -- time.
This script is used to identify and kill those types of forms processers in order to decrease the unncessary load on the application servers.
Tested on EBS R12 , but it needs to be tested more..
Script Creation Date : 24.01.2021
Last Modification Date : 27.01.2021
Lang: python
Author: Erman Arslan
Features:
Handle LD_LIBRARY_PATH environment variable
Oracle DB Connectivity
Get Apps password as an input without showing it in the command line.
Batch and interactive modes.
Connect to DB once and use that connection while iterating our 2d array.
Identifying runaway forms processes (having a process id, but not having a db session & having a high cumulative cpu time. >2h
Parsing ps output, storing in an 2d array.
Logging -- date + findings/actions
"""
######################################################
############## IMPORTING MODULES ##################
######################################################
"""
We import the modules that we need use in our code. import command is similar to #include in C/C++ ..
We could also have imported the required objects only.. (rather than importing the whole module) - Using from <module_name> import <names>
Note that, we also import numpy. Numpy provides a high-performance multidimensional array object, and tools for working with these arrays.
In order to have numpy in our server, we installed pip and then using pip, we installed numpy.
By using the as keyword, we give numpy an alternate name and we use that alternate name in our code.(just to make easier for us to write the name, we use np as an alternate way here..
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
python get-pip.py
pip install numpy
"""
import subprocess
import os
import sys
import cx_Oracle
import numpy as np
from datetime import datetime
from datetime import date
import logging
import getpass
####note that, we set the LD_LIBRARY_PATH to the instant client directory. We handle it.. We set it to the directoy where oracle client libraries reside.
if ('LD_LIBRARY_PATH' not in os.environ or '/oracle_client/instantclient_12_1' not in os.environ['LD_LIBRARY_PATH']):
os.environ['LD_LIBRARY_PATH'] = '/oracle_client/instantclient_12_1'
try:
os.execv(sys.argv[0], sys.argv)
except Exception, exc:
print 'Failed re-exec:', exc
sys.exit(1)
"""
We install cx_Oracle module, as well.. pip install cx_Oracle==7.3
for EBS 12.1, we install 64 bit oracle client, as we have 32 bit oracle homes in apps nodes.. If we dont have 64 bit oracle client in place, get the following error:
cx_Oracle.DatabaseError: DPI-1047: Cannot locate a 64-bit Oracle Client library: "libclntsh.so: wrong ELF class: ELFCLASS32". See https://oracle.github.io/odpi/doc/installation.html#linux for help
instantclient-basic-linux.x64-12.1.0.2.0.zip
[root@ebstestdb oracle_client]# unzip instantclient-basic-linux.x64-12.1.0.2.0.zip
Archive: instantclient-basic-linux.x64-12.1.0.2.0.zip
inflating: instantclient_12_1/adrci
inflating: instantclient_12_1/BASIC_README
inflating: instantclient_12_1/genezi
inflating: instantclient_12_1/libclntshcore.so.12.1
inflating: instantclient_12_1/libclntsh.so.12.1
inflating: instantclient_12_1/libipc1.so
inflating: instantclient_12_1/libmql1.so
inflating: instantclient_12_1/libnnz12.so
inflating: instantclient_12_1/libocci.so.12.1
inflating: instantclient_12_1/libociei.so
inflating: instantclient_12_1/libocijdbc12.so
inflating: instantclient_12_1/libons.so
inflating: instantclient_12_1/liboramysql12.so
inflating: instantclient_12_1/ojdbc6.jar
inflating: instantclient_12_1/ojdbc7.jar
inflating: instantclient_12_1/uidrvci
inflating: instantclient_12_1/xstreams.jar
We also soft link the library cd /oracle_client/instantclient_12_1; ln -s libclntsh.so.12.1 libclntsh.so
Note that, we already imported the cx_Oracle module above..
"""
######################################################
############## FUNCTION DEFINITIONS ################
######################################################
# We define our kill, db_conn and a db_check functions just to use some functions in python :)
# Note that, we create a single connection and use that connection while iterating our 2d array..
def SIGKILL_func(forms_pid):
kill_cmd='kill -9 ' + forms_pid
os.system(kill_cmd)
def db_conn(apps_pass):
EBS_tns = cx_Oracle.makedsn('ebstestdb', '1555', service_name='TEST') # if needed, place an 'r' before any parameter in order to address special characters such as '\'.
global conn
conn = cx_Oracle.connect(user=r'APPS', password=apps_pass, dsn=EBS_tns)
def db_check(forms_db_pid):
process_check_query = "select PROCESS from v$session where PROCESS=",forms_db_pid
process_check_query = ''.join(process_check_query)
c = conn.cursor()
c.execute(process_check_query)
c.fetchone() #we try the fetch one record to populate the c.rowcount properly..
if (c.rowcount == 0):
print "This process has no db session, so it is ok to be killed"
logging.info('This process has no db session, so it is ok to be killed')
return "killable"
else:
c.execute(process_check_query)
for row in c:
if(row[0]==forms_db_pid):
logging.info('This process has db session, so we should not kill it.')
print "This process has db session, so we should not kill it."
return "Not killable"
else:
logging.info('This is weird')
print "This is weird.."
return "Not killable"
######################################################
############## WE START HERE #######################
######################################################
#We first check our command line arguments and exit if we don't like the command line..
if (len(sys.argv) > 2 ):
print "Wrong argument given..\n Usage : runaway_frmweb_handler.py or runaway_frmweb_handler.py batch"
quit()
elif ( len(sys.argv) == 2 ):
if (sys.argv[1]!="batch"):
print "Wrong argument given..\n Usage : runaway_frmweb_handler.py or runaway_frmweb_handler.py batch"
quit()
else:
print "Running in batch mode."
else:
print "Running in interactive mode."
#We start logging to file here... We will log our findings and actions in /tmp/runaway_frmweb_handler.log.
import logging
logging.basicConfig(filename='/tmp/runaway_frmweb_handler.log', filemode='w',encoding='utf-8',format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)
logging.info('Script started')
#We also get our apps password here. We get apps password without displaying it in the shell..
apps_password= getpass.getpass("Enter your APPS password: ")
#We connect to the database
db_conn(apps_password)
"""
We build our command to check Linux process, which have high cumulative cpu time..
Cumulative CPU time, "[DD-]hh:mm:ss" format. (alias time).
That cumulative CPU time we get from command corresponds to the TIME+ value that we see in top command output..
Ofcouse we get process ids as well.. We use process ids to kill those runaway processes..
Note that, we execute our command using subprocess call, we get the output and we manipulate the output array with numpy.reshape.
"""
cmd = ['ps -eo pid,cputime,euser,ucmd | grep `whoami` |grep -v grep | grep frmweb | awk {\'print $1" "$2\'}']
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
process_to_kill_counter=0
o,e = proc.communicate()
ps_array=o.decode('ascii')
ps_array=ps_array.split()
import numpy as np
ps_array_2d = np.reshape(ps_array, (len(ps_array)/2, 2))
array_length=len(ps_array)/2
"""
We check our array and get the process ids of the runaway forms processes.. -if there are any...
We ask if it is okay to kill those processes that we identify.
That is it.
"""
"""
Note that, if we see 4th digit, I mean if we see a value in the day files of the cumulative cpu time, then we directly consider that process runaway.
If we don't have a 4th digit, we check -> if ( cputime_hour_count>1 ), and decide..
We treat a forms process with a cumulative CPU time of more than 2 hours as a runaway. We still get user's confirmation before doing anything..
"""
if (array_length <= 0):
print "There are no forms processes running"
logging.info('There are no forms process running, I quit.')
quit()
for x in range(array_length):
process_id = ps_array_2d[x][0]
try:
cputime = datetime.strptime(ps_array_2d[x][1], '%d-%H:%M:%S')
print "Found a runaway process with a cumulative cpu time > 1 day ->", process_id
logging.info('Found a runaway process with a cumulative cpu time > 1 day -> %s', process_id)
if (db_check(process_id)=="killable"):
process_to_kill_counter += 1
if (len(sys.argv) != 2): # we already control the cmd line arguments in the beginning, so if we are here then it means all arguments are correct, so it is sufficient to check the length..
answer_input= raw_input("Do you want me to kill it? Y or N :")
if (answer_input=="Y"):
logging.info ('Approved! Killing Process id: %s',process_id)
print "Killing Process id : ", process_id
SIGKILL_func(process_id)
else :
logging.info('Disapproved! I will leave it running')
print "Okay.. I will leave it running"
else :
logging.info ('I m in batch mode, so approved! Killing Process id: %s',process_id)
print "Killing Process id : ", process_id
SIGKILL_func(process_id)
except ValueError:
cputime = datetime.strptime(ps_array_2d[x][1], '%H:%M:%S')
cputime_hour_count=cputime.hour
if ( cputime_hour_count>1 ):
print "Found a runaway process with a cumulative cpu time > 2h ->", process_id
logging.info('Found a runaway process with a cumulative cpu time > 2h -> %s',process_id)
if (db_check(process_id)=="killable"):
process_to_kill_counter += 1
if (len(sys.argv) != 2): # we already control the cmd line arguments in the beginning, so if we are here then it means all arguments are correct, so it is sufficient to check the length..
answer_input= raw_input("Do you want me to kill it? Y or N :")
if (answer_input=="Y"):
logging.info('Approved! Killing Process id : %s', process_id)
print "Killing Process id : ", process_id
SIGKILL_func(process_id)
else :
logging.info('Disapproved! I will leave it running')
print "Okay.. I will leave it running"
else :
logging.info ('I m in batch mode, so approved! Killing Process id: %s',process_id)
print "Killing Process id : ", process_id
SIGKILL_func(process_id)
if ( process_to_kill_counter==0 ):
logging.info('No runaway form processes to kill')
print "No runaway form processes to kill"
conn.close() #we close our database connection at the end