[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
RE: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 t
From: |
Koleti, Haritha |
Subject: |
RE: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016 |
Date: |
Wed, 16 Jun 2021 23:28:21 +0000 |
thank you Ed. I have created dummy files for test , I tried the code but was
not successful in getting the output .
We have 3 files .below command invokes the action.
TYPE Employee1.csv|gawk -v f2=Map_Attr1.csv -v f3=LINEITEMVALUE.CSV -f
emp_dtls.awk>Emp_Dtls.csv
Emp_dtls.awk -- I changed this to your suggested code. But map2[] and map3[]
we are not able to work it.
BEGIN {
FS=",";
}
{
t1=$1;
smgmt="";
scompcode="";
sprofcenter="";
sba="";
spayscale="";
sband="";
sgrade="";
seegrp="";
seesgrp="";
sposname="";
sarea="";
steprate="";
salary="";
incplan="";
rateperhour="";
vachour="";
othour="";
ytdot="";
illnesshr="";
smeal="";
coffee="";
alltravel="";
stravel="";
standbyhr="";
shifthr="";
sminrate="";
while( (getline<f2) > 0)
{
if (t1==$3)
{
if ($6=="Mgmt/Union")
{
smgmt=$2;
}
if ($6=="Company Code")
{
scompcode=$2;
}
if ($6=="Profit Center")
{
sprofcenter=$2;
}
if ($6=="Business Area")
{
sba=$2;
}
if ($6=="Pay Scale Type")
{
spayscale=$2;
}
if ($6=="BAND")
{
sband=$2;
}
if ($6=="Grade Level / Step")
{
sgrade=$2;
}
if ($6=="EE Group")
{
seegrp=$2;
}
if ($6=="EE SubGroup")
{
seesgrp=$2;
}
if ($6=="Position Name")
{
sposname=$2;
}
if ($6=="Personal Area Description")
{
sarea=$2;
}
}
}
close(f2);
while( (getline<f3) > 0)
{
if ($1=="V.0")
{
if ($2=="Year-2022")
{
if (t1==$3)
{
if ($4=="Avg Projected 1 step rate")
{
steprate=$6;
}
if ($4=="Average Salary")
{
salary=$6;
}
if ($4=="Avg Incentive plan")
{
incplan=$6;
}
if ($4=="Avg Rate per hour")
{
rateperhour=$6;
}
if ($4=="Vacation hours")
{
vachour=$6;
}
if ($4=="Avg OT hours")
{
othour=$6;
}
if ($4=="YTD OT Hours")
{
ytdot=$6;
}
if ($4=="Avg Illness hours")
{
illnesshr=$6;
}
if ($4=="Avg Meals")
{
smeal=$6;
}
if ($4=="Avg Coffee money")
{
coffee=$6;
}
if ($4=="Avg Travel / Remote Rpt / Area Maint.")
{
alltravel=$6;
}
if ($4=="Avg Travel Expenses")
{
stravel=$6;
}
if ($4=="Avg Standby hours")
{
standbyhr=$6;
}
if ($4=="Avg Shift premium hours")
{
shifthr=$6;
}
if ($4=="MIN RATE")
{
sminrate=$6;
}
}
}
}
}
close(f3);
split(t1,a,"-");
printf("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,\n",smgmt,scompcode,sprofcenter,a[1],a[2],sba,spayscale,sband,sgrade,a[3],a[4],seegrp,seesgrp,sposname,sarea,steprate,salary,incplan,rateperhour,vachour,othour,ytdot,illnesshr,smeal,coffee,alltravel,stravel,standbyhr,shifthr,sminrate);
}
From: Ed Morton <mortoneccc@comcast.net>
Sent: Wednesday, June 16, 2021 9:01 AM
To: Koleti, Haritha <Haritha.Koleti@pseg.com>
Cc: Pirane, Marco <Marco.Pirane@pseg.com>; bug-gawk@gnu.org; Pereira, Ricardo
<Ricardo_D.Pereira@pseg.com>
Subject: Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008
to Win 2016
***CAUTION***
***CAUTION***
***CAUTION***
This e-mail is from an EXTERNAL address. The actual sender is
(mortoneccc@comcast.net<mailto:mortoneccc@comcast.net>) which may be different
from the display address in the From: field. Be cautious of clicking on links
or opening attachments. Suspicious? Report it via the Report Phishing button.
On mobile phones, forward message to Cyber Security.
The function should of course be using "tag" rather than "idx" in the ternary:
function mapit(map,tag) {
return ( ($1,tag) in map ? map[$1,tag] : "" )
}
Some (obfuscated of course) sample input and expected output to test with would
be nice!
Ed.
On 6/16/2021 7:56 AM, Ed Morton wrote:
Just modify it in exactly the same way as I modified your other script and
populate a map array in the BEGIN section then look up that in the main body
instead of reading f2 for every input line, e.g something like (untested):
awk -v f2=foo -v f3=bar '
BEGIN {
while( (getline<f2) > 0) {
map2[$3,$6] = $2
}
while( (getline<f3) > 0) {
if ( ($1=="V.0") && ($2=="Year-2022") ) {
map3[$3,$4] = $2
}
}
}
{
smgmt = mapit(map2,"Mgmt/Union")
scompcode = mapit(map2,"Company Code")
steprate = mapit(map3,"Avg Projected 1 step rate")
salary = mapit(map3,"Average Salary")
}
function mapit(map,tag) {
return ( ($1,idx) in map ? map[$1,idx] : "" )
}
' file
Regards,
Ed.
On 6/16/2021 7:23 AM, Koleti, Haritha via Bug reports and all discussion about
gawk. wrote:
Good Morning Ed.
This one is running for ever. 13 minutes versus 105 minutes.
BEGIN {
FS=",";
}
{
t1=$1;
smgmt="";
scompcode="";
sprofcenter="";
sba="";
spayscale="";
sband="";
sgrade="";
seegrp="";
seesgrp="";
sposname="";
sarea="";
steprate="";
salary="";
incplan="";
rateperhour="";
vachour="";
othour="";
ytdot="";
illnesshr="";
smeal="";
coffee="";
alltravel="";
stravel="";
standbyhr="";
shifthr="";
sminrate="";
while( (getline<f2) > 0)
{
if (t1==$3)
{
if ($6=="Mgmt/Union")
{
smgmt=$2;
}
if ($6=="Company Code")
{
scompcode=$2;
}
if ($6=="Profit Center")
{
sprofcenter=$2;
}
if ($6=="Business Area")
{
sba=$2;
}
if ($6=="Pay Scale Type")
{
spayscale=$2;
}
if ($6=="BAND")
{
sband=$2;
}
if ($6=="Grade Level / Step")
{
sgrade=$2;
}
if ($6=="EE Group")
{
seegrp=$2;
}
if ($6=="EE SubGroup")
{
seesgrp=$2;
}
if ($6=="Position Name")
{
sposname=$2;
}
if ($6=="Personal Area Description")
{
sarea=$2;
}
}
}
close(f2);
while( (getline<f3) > 0)
{
if ($1=="V.0")
{
if ($2=="Year-2022")
{
if (t1==$3)
{
if ($4=="Avg Projected 1 step rate")
{
steprate=$6;
}
if ($4=="Average Salary")
{
salary=$6;
}
if ($4=="Avg Incentive plan")
{
incplan=$6;
}
if ($4=="Avg Rate per hour")
{
rateperhour=$6;
}
if ($4=="Vacation hours")
{
vachour=$6;
}
if ($4=="Avg OT hours")
{
othour=$6;
}
if ($4=="YTD OT Hours")
{
ytdot=$6;
}
if ($4=="Avg Illness hours")
{
illnesshr=$6;
}
if ($4=="Avg Meals")
{
smeal=$6;
}
if ($4=="Avg Coffee money")
{
coffee=$6;
}
if ($4=="Avg Travel / Remote Rpt / Area Maint.")
{
alltravel=$6;
}
if ($4=="Avg Travel Expenses")
{
stravel=$6;
}
if ($4=="Avg Standby hours")
{
standbyhr=$6;
}
if ($4=="Avg Shift premium hours")
{
shifthr=$6;
}
if ($4=="MIN RATE")
{
sminrate=$6;
}
}
}
}
}
close(f3);
split(t1,a,"-");
printf("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,\n",smgmt,scompcode,sprofcenter,a[1],a[2],sba,spayscale,sband,sgrade,a[3],a[4],seegrp,seesgrp,sposname,sarea,steprate,salary,incplan,rateperhour,vachour,othour,ytdot,illnesshr,smeal,coffee,alltravel,stravel,standbyhr,shifthr,sminrate);
}
[https://www.pseg.com/images/global/email/PSEG_emailsignature_PSEGw-tag_version2.png]<http://www.pseg.com><http://www.pseg.com>
[http://facebook.com/pseg
[facebook.com]<https://urldefense.com/v3/__http:/facebook.com/pseg__;!!ITzsDw!8jMFdHJoTgEcw4FW_fLEtaQnbZuBOU8l3aIevz5hqiYp6ZH5eK1RjHiW3JYiPzxpEQ$>]<http://www.facebook.com/pseg>
[facebook.com]<https://urldefense.com/v3/__http:/www.facebook.com/pseg__;!!ITzsDw!8jMFdHJoTgEcw4FW_fLEtaQnbZuBOU8l3aIevz5hqiYp6ZH5eK1RjHiW3JZP8HmViA$>
[Twitter] <http://www.twitter.com/psegdelivers>
[twitter.com]<https://urldefense.com/v3/__http:/www.twitter.com/psegdelivers__;!!ITzsDw!8jMFdHJoTgEcw4FW_fLEtaQnbZuBOU8l3aIevz5hqiYp6ZH5eK1RjHiW3JaSg24PxQ$>
[LinkedIn] <http://www.linkedin.com/company/pseg>
[linkedin.com]<https://urldefense.com/v3/__http:/www.linkedin.com/company/pseg__;!!ITzsDw!8jMFdHJoTgEcw4FW_fLEtaQnbZuBOU8l3aIevz5hqiYp6ZH5eK1RjHiW3JblXxTkWg$>
[https://www.pseg.com/images/global/WP_LOGOgrey.png]
<http://energizepseg.com/>
[energizepseg.com]<https://urldefense.com/v3/__http:/energizepseg.com/__;!!ITzsDw!8jMFdHJoTgEcw4FW_fLEtaQnbZuBOU8l3aIevz5hqiYp6ZH5eK1RjHiW3JaJxC_BPw$>
PSEGSC
-----Original Message-----
From: Ed Morton <mortoneccc@comcast.net><mailto:mortoneccc@comcast.net>
Sent: Tuesday, June 15, 2021 1:15 PM
To: Koleti, Haritha <Haritha.Koleti@pseg.com><mailto:Haritha.Koleti@pseg.com>
Cc: Eli Zaretskii <eliz@gnu.org><mailto:eliz@gnu.org>;
arnold@skeeve.com<mailto:arnold@skeeve.com>;
wolfgang.laun@gmail.com<mailto:wolfgang.laun@gmail.com>;
bug-gawk@gnu.org<mailto:bug-gawk@gnu.org>; Pereira, Ricardo
<Ricardo_D.Pereira@pseg.com><mailto:Ricardo_D.Pereira@pseg.com>; Pirane, Marco
<Marco.Pirane@pseg.com><mailto:Marco.Pirane@pseg.com>
Subject: Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008
to Win 2016
***CAUTION******CAUTION******CAUTION***This e-mail is from an EXTERNAL address.
The actual sender is (mortoneccc@comcast.net<mailto:mortoneccc@comcast.net>)
which may be different from the display address in the From: field. Be cautious
of clicking on links or opening attachments. Suspicious? Report it via the
Report Phishing button. On mobile phones, forward message to Cyber Security.
No, just fix the ones that are extremely slow so that solves your current
problem and ensures you won’t have a similar problem next time something else
changes in your environment.
Ed Morton
On Jun 15, 2021, at 12:01 PM, Koleti, Haritha via Bug reports and all
discussion about gawk. <bug-gawk@gnu.org><mailto:bug-gawk@gnu.org> wrote:
Ed, these inefficient scripts worked ~10 minutes in 2008. Do you think to
address this(>90 mins on 2016) performance we have to change all >100 AWK
scripts?
Is there any other way that you can think of would be great.
Thanks
Haritha
From: Ed Morton <mortoneccc@comcast.net><mailto:mortoneccc@comcast.net>
Sent: Tuesday, June 15, 2021 11:21 AM
To: Koleti, Haritha <Haritha.Koleti@pseg.com><mailto:Haritha.Koleti@pseg.com>;
Eli Zaretskii <eliz@gnu.org><mailto:eliz@gnu.org>;
arnold@skeeve.com<mailto:arnold@skeeve.com>
Cc: wolfgang.laun@gmail.com<mailto:wolfgang.laun@gmail.com>;
bug-gawk@gnu.org<mailto:bug-gawk@gnu.org>; Pereira, Ricardo
<Ricardo_D.Pereira@pseg.com><mailto:Ricardo_D.Pereira@pseg.com>; Pirane, Marco
<Marco.Pirane@pseg.com><mailto:Marco.Pirane@pseg.com>
Subject: Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008
to Win 2016
Haritha - good. The REAL root cause of your problems is simply that the script
was written extremely inefficiently. If you have any other scripts that take in
the order of minutes to run given input files of the size you reported then
those are also written extremely inefficiently and the fix is to correct those
scripts to run efficiently, not try to tune the environment such that those
scripts can run faster but still using enormous amounts of time like 10
minutes. So I'd recommend just fixing whichever scripts you have that are
taking minutes to run, if any.
Ed.
On 6/15/2021 10:13 AM, Koleti, Haritha wrote:
Ed,
This worked like a charm <1 minute. But we have 100s of scripts . if would
really help if we can find a root cause why this 10 minutes versus 90 minutes.
Thanks
Haritha
From: Ed Morton
<mortoneccc@comcast.net><mailto:mortoneccc@comcast.net><mailto:mortoneccc@comcast.net><mailto:mortoneccc@comcast.net>
Sent: Tuesday, June 15, 2021 9:05 AM
To: Koleti, Haritha
<Haritha.Koleti@pseg.com><mailto:Haritha.Koleti@pseg.com><mailto:Haritha.Koleti@pseg.com><mailto:Haritha.Koleti@pseg.com>;
Eli Zaretskii
<eliz@gnu.org><mailto:eliz@gnu.org><mailto:eliz@gnu.org><mailto:eliz@gnu.org>;
arnold@skeeve.com<mailto:arnold@skeeve.com><mailto:arnold@skeeve.com><mailto:arnold@skeeve.com>
Cc:
wolfgang.laun@gmail.com<mailto:wolfgang.laun@gmail.com><mailto:wolfgang.laun@gmail.com><mailto:wolfgang.laun@gmail.com>;
bug-gawk@gnu.org<mailto:bug-gawk@gnu.org><mailto:bug-gawk@gnu.org><mailto:bug-gawk@gnu.org>;
Pereira, Ricardo
<Ricardo_D.Pereira@pseg.com><mailto:Ricardo_D.Pereira@pseg.com><mailto:Ricardo_D.Pereira@pseg.com><mailto:Ricardo_D.Pereira@pseg.com>;
Pirane, Marco
<Marco.Pirane@pseg.com><mailto:Marco.Pirane@pseg.com><mailto:Marco.Pirane@pseg.com><mailto:Marco.Pirane@pseg.com>
Subject: Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008
to Win 2016
***CAUTION***
***CAUTION***
***CAUTION***
This e-mail is from an EXTERNAL address. The actual sender is
(mortoneccc@comcast.net<mailto:mortoneccc@comcast.net><mailto:mortoneccc@comcast.net><mailto:mortoneccc@comcast.net>)
which may
The information contained in this e-mail, including any attachment(s), is
intended solely for use by the named addressee(s). If you are not the intended
recipient, or a person designated as responsible for delivering such messages
to the intended recipient, you are not authorized to disclose, copy, distribute
or retain this message, in whole or in part, without written authorization from
PSEG. This e-mail may contain proprietary, confidential or privileged
information. If you have received this message in error, please notify the
sender immediately. This notice is included in all e-mail messages leaving
PSEG. Thank you for your cooperation.
The information contained in this e-mail, including any attachment(s), is
intended solely for use by the named addressee(s). If you are not the intended
recipient, or a person designated as responsible for delivering such messages
to the intended recipient, you are not authorized to disclose, copy, distribute
or retain this message, in whole or in part, without written authorization from
PSEG. This e-mail may contain proprietary, confidential or privileged
information. If you have received this message in error, please notify the
sender immediately. This notice is included in all e-mail messages leaving
PSEG. Thank you for your cooperation.
Employee1.csv
Description: Employee1.csv
Map_Attr1.csv
Description: Map_Attr1.csv
LINEITEMVALUE1.CSV
Description: LINEITEMVALUE1.CSV
- Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, (continued)
- Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Eli Zaretskii, 2021/06/16
- Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Ed Morton, 2021/06/15
- RE: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Koleti, Haritha, 2021/06/15
- Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Ed Morton, 2021/06/15
- RE: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Koleti, Haritha, 2021/06/15
- Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Ed Morton, 2021/06/15
- RE: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Koleti, Haritha, 2021/06/16
- Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Ed Morton, 2021/06/16
- Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Ed Morton, 2021/06/16
- Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Eli Zaretskii, 2021/06/16
- RE: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016,
Koleti, Haritha <=
- Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Ed Morton, 2021/06/16
- RE: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Koleti, Haritha, 2021/06/16
- Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Ed Morton, 2021/06/16
- RE: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Koleti, Haritha, 2021/06/16
- Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Andrew J. Schorr, 2021/06/16
- Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, arnold, 2021/06/17
- Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Eli Zaretskii, 2021/06/15
- Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Eli Zaretskii, 2021/06/15
- RE: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Koleti, Haritha, 2021/06/15
- Re: [EXTERNAL] Re: Performance issues using GAWK 3.1.6 ->from Win 2008 to Win 2016, Andrew J. Schorr, 2021/06/15