#!/local/gnu/bin/perl -w ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### The Logfile parser is a research product of the Information ### Technology Laboratory of the National Institute of Standards and ### Technology (NIST). Since NIST is an agency of the Federal government, ### this software is not subject to copyright. We ask anyone who uses or ### distributes this source code to include this notice. ### ### Primary author: John Cugini / CUZ@NIST.GOV ### Version Date: 2002-mar-15 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## ## Parse standardly formatted logfile (.ulog) and generate ## any or all of these: ## ## description filename parameter ## ----------- -------- --------- ## a nicely formatted HTML file .html h+ or h- ## a strictly formatted parse file .parse p+ or p- ## userpath files for VISVIP ## --.up u+ or u- ## ## The first parameter defines the subdirectory for the website of ## interest (within the WEBLOG_DATA subtree), the second parameter ## defines the session name, and subsequent parameters control which ## files get written. ## ## E.g.: ## flud-parser.perl mysite session37 u+ p+ h- ## ## says to analyze the file named: ## $WEBLOG_DATA/mysite/sessions/session37/session37.ulog ## ## and to generate userpath files (e.g. ## $WEBLOG_DATA/mysite/sessions/session37/findcity-t1.up, ## $WEBLOG_DATA/mysite/sessions/session37/buyticket-t2.up, etc.) ## ## and a parse file, ## $WEBLOG_DATA/mysite/sessions/session37/session37.parse, ## ## but not an HTML file ## $WEBLOG_DATA/mysite/sessions/session37/session37.html ## - exactly the opposite of the defaults for file generation. ## ## ----- Special parameters for u+ parsing ----- ## ## The u+ parameter causes the parser to attempt to generate *.up files ## which will be accepted by VisVIP. To do this, the URLs in the .ulog ## file have to be associated with the URLs in the url2nn.dat file ## within the website subdirectory. E.g. given the example above, there ## must be a $WEBLOG_DATA/mysite/webstruct/url2nn.dat file that contains ## the mapping from full URLs to the nicknames used by VisVIP. ## ## However, in the usual WebVIP process for generating .ulog files, ## a copy of the actual website is made and then the site is ## instrumented. The .ulog file is generated on the instrumented ## site, but must be associated with the original website. Note ## that the url2nn.dat file represents the original website. The ## "host" and "dirsub" parameters allow you to make this association. ## ## First, the parser accepts an optional "host=" parameter, ## which is used when analyzing URLs within the .ulog file. If absent, ## the hostname information is taken from the url2nn.dat file. Also, ## the parser accepts an optional "dirsub=###" ## parameter that causes substitution of high-level directory names. It ## is assumed that there is a commonly named subtree for the original ## and instrumented site. The dirsub substitution is applied only ## to URLs which match the hostname. ## ## For example, suppose the original website (as recorded in url2nn.dat) ## is in the directory: ## ## http://operate.biz.com/external/sales/... ## ## and the copied, instrumented site (as recorded in the .ulog file) ## is under: ## ## http://develop.biz.com/smith/testing/sales/... ## ## Then, the parser would be invoked like this: ## ## flud-parser.perl mysite user1 u+ host=develop.biz.com \ ## dirsub=#/smith/testing#/external# ## ## The "#" character used to delimit the directory names is arbitrary; ## the parser will use the first character, whatever it is, as the ## delimiter. Obviously, this delimiter character must not appear ## within the directory names. ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub htmlize ### Idea here is to turn a flaky string into something nicely printable ### in HTML. Backslashed double quotes and backslashes get ### de-backslashed. The HTML metacharacters <,>,& are de-fanged. { my ($rawrec) = @_; if ($rawrec !~ m%[&><\\]%) ## short-circuit - no need to change { return ($rawrec); } $rawrec =~ s%&%&%g; $rawrec =~ s%<%<%g; $rawrec =~ s%>%>%g; $rawrec =~ s%\\\\%<>%g; $rawrec =~ s%\\"%"%g; $rawrec =~ s%(\\.)%$1%g; $rawrec =~ s%<>%\\%g; return ($rawrec); } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub errmsg # write error message to file and print out and increment error-count { my ($p1, $p2) = @_; if ($p2 eq "W") { $severity = "WARNING"; } elsif ($p2 eq "E") { $severity = "ERROR"; } else { $severity = "???"; } $msg = "$severity - rec# $rec_ct: " . htmlize($p1) . "\n"; if ($gen_html) { print HTML "\n
\n$msg
\n"; } $msg =~ s%
%\n%gi; print STDERR $msg; $num_errs++; } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub errmsg_lrec # set up error message with lrec { errmsg ("$_[0]:
$lrec", "E"); } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub is_leap_year # return 1 if a leap year, else 0; { my ($year) = @_; if ($year % 100) { return (($year % 4) ? 0 : 1); } # not a century year else { return (($year % 400) ? 0 : 1); } # century year } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub days_this_month # return number of days in this month { my ($year, $month) = @_; if ($month == 2 && is_leap_year($year)) { return (29); } else { return ($days_per_month[$month - 1]); } } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub mjd # return modified julian date { my ($year, $month, $day) = @_; ### convert from yyyy/mm/dd to MJD (modified Julian date) ### (Nov 17, 1858 (midnight) == 0; e.g. Nov 18, 1858, 6am = 1.25) ### assumes date validity already checked. ### go back only within Gregorian calendar */ if ($year < 1583) { return (-666666); } my $leap = is_leap_year ($year); ## complete years elapsed since 1580-jan-1 $last_complete_year = $year - 1; $yrs_passed = $last_complete_year - 1580; # approx #leap years in those years $leap_years = 1 + int ($yrs_passed / 4); ## knock out century years since 1700 $century_days = int (($last_complete_year - 1600) / 100); ## toss back in quad-century years since 1700 $quad_days = int ($century_days / 4); if ($century_days < 0) { $century_days = 0; } if ($quad_days < 0) { $quad_days = 0; } ## total days for complete prior years $yr_total = $yrs_passed*365 + $leap_years + $quad_days - $century_days; ## how many days within this year */ $these_days = 0; for ($ix=0; $ix < $month-1; $ix++) { $these_days += $days_per_month[$ix]; } $these_days += $day + ($leap && ($month>2)); ## change base from 1580/01/01 to 1858/11/17 return ($yr_total + $these_days - 101494); } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub field_value # extract just the field value, not sequence count and format { my ($lkey) = @_; if ($full_field = $log_field{$lkey}) { if ($full_field =~ m%\A\d+/[A-Z]+/(.*)\Z%) { return ($1); } else { errmsg ("Invalid log_field entry found for $lkey : $full_field", "E"); } } return ("*****"); } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub clean_name # get a de-quoted field value with no slashes { my ($fn) = @_; return ( ($fn =~ m%(\w+)%) ? $1 : "anon"); } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub lex_val # evaluate fieldname { my ($fn) = @_; $log_field{$fn} =~ m%\A(\d+)/%; return ($1); } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub compare_fields # compare field-names { lex_val($a) <=> lex_val($b); } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub parse_time # parse $time_field, convert to MJD days, seconds; # then check against previous days, seconds. { if ($time_field =~ m%\A(\d+)/(\d+)/(\d+)-(\d+):(\d+):(\d+\.\d*)\Z%) { #OK to compute } else { errmsg_lrec ("Could not parse time field"); return (101); } $yyyy = $1; $mm = $2; $dd = $3; $hrs = $4; $mins = $5; $secs = $6; if ($mm < 1 || $mm > 12) { errmsg_lrec ("Invalid month in time field"); return (102); } if ($dd < 1 || $dd > days_this_month($yyyy, $mm)) { errmsg_lrec ("Invalid day in time field"); return (103); } if ($hrs < 0 || $hrs > 23) { errmsg_lrec ("Invalid hours in time field"); return (104); } if ($mins < 0 || $mins > 59) { errmsg_lrec ("Invalid minutes in time field"); return (105); } if ($secs < 0 || $secs >= 60) { errmsg_lrec ("Invalid seconds in time field"); return (106); } $log_days = mjd($yyyy, $mm, $dd); $log_secs = 3600 * $hrs + 60 * $mins + $secs; if ($log_days < $prev_days) { errmsg ("Date out of order in time field", "W"); } # could put in a fudge factor here: # elsif ($log_days == $prev_days && $log_secs < $prev_secs-1) elsif ($log_days == $prev_days && $log_secs < $prev_secs) { errmsg ("Time out of order in time field", "W"); } $prev_days = $log_days; $prev_secs = $log_secs; return (0); } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub context_ok # check expected (target) context against actual context. { my ($target_context) = @_; $target_context = "/" . $target_context . "/"; $this_context = "/" . $context[$c_ptr] . "/"; return ($target_context =~ m%$this_context%) } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub prt_rec_fields # print formatted, colored logfile records { my ($color, $label, $show_time, $next_line) = @_; print HTML "
$label"; if ($show_time) { printf HTML " : (seconds=%7.3f) time=%s\n", $rel_secs, field_value("time"); } print HTML "
" . htmlize($next_line) . "\n"; } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub head_chunk # handle a header-record for some block from logfile. # check against context and re-set context. { my ($expected_context, $new_context, $color, $label) = @_; if (context_ok ($expected_context)) { $c_ptr++; $context[$c_ptr] = $new_context; if ($gen_html) { prt_rec_fields ($color, $label, 1, $the_rest); print HTML "
\n"; } } else { errmsg_lrec ("$label out of context"); } } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub tail_chunk # handle an end-record for some block from logfile. # check against context and re-set context. { my ($expected_context, $color, $label) = @_; if (context_ok ($expected_context)) { $c_ptr--; if ($gen_html) { print HTML "
\n"; prt_rec_fields ($color, $label, 1, $the_rest); } return (0); } else { errmsg_lrec ("$label out of context"); return (101); } } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub chk_4_label # check $the_rest to see if there is a leading label. # return the label and the "stuff" immediately following it. { if ($the_rest =~ s%\A([:\-\w\.]+)( +|,|=)%%) { return ($1,$2); } else { return ("*",0); } } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub look_label_list # check $the_rest to see if there is a leading label_list. { if ($the_rest =~ s%\A([:\-\w\.]+(,[:\-\w\.]+)*)( +)%%) { return (1,$1); } else { return (0,0); } } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub chk_rest # check common grammar of $the_rest after time field, according # to $rec_format. { my ($rec_format) = @_; while (1) { # get expected format for next field if ($rec_format eq "adhoc" || $rec_format eq "adhoc-event") { $targetname = "ADHOC"; $connector = "="; $val_form = "A"; $optional = "?"; } elsif ($rec_format eq "more") { # special treatment per action or event @retcode = ("OK", "more"); last; } elsif ($rec_format =~ s%\A(\w+)([=/])([A-Z]+)(\??)( +|\Z)%%) { $targetname = $1; # expected name of next field $connector = $2; # '=' for keyword, '/' for simple value $val_form = $3; # expected format of value - # L:label, A:alpha, N:number, etc $optional = $4; # '?' for optional field, null for mandatory } else { die "Invalid format: $rec_format"; } if ($optional eq "?") { if ($connector ne "=") { die "Only keyword fields can be optional: $rec_format"; } # check for no more fields remaining in log-record if ($the_rest =~ m%\A *\Z%) { @retcode = ("OK", "done"); last; } # check for next event component if ($the_rest =~ m%\A#% && $rec_format eq "adhoc-event") { @retcode = ("OK", "done"); last; } } # format established and there should be some non-blanks left in # $the_rest - check against next field from logfile. # first, establish actual fieldname if ($connector eq "=") { # keyword=value pair if ($targetname eq "ADHOC") { ($lbl, $lbl_end) = chk_4_label; if ($lbl_end eq "=") { $fieldname = $lbl; # ad hoc fieldname # make sure it's not one of the pre-defined ones. $tmp_lbl = "/" . $lbl . "/"; if ($all_kws =~ m%$tmp_lbl%) { @retcode = ("NG", "ad hoc keyword:$lbl same as pre-defined"); last; } } else { @retcode = ("NG", "Need: ad_hoc_fieldname=value at $lbl"); last; } } else { if ($the_rest =~ s%\A($targetname)=%%) { $fieldname = $1; # defined fieldname } else { if ($optional eq "?") { # apparently, this optional field is not used next; } else { @retcode = ("NG", "Missing mandatory $targetname"); last; } } } } else { # treat as single (non-keyword) value $fieldname = $targetname; #implicit fieldname } # OK, fieldname now established. Value of field should be # right up front. Append component-ID to fieldname. $fieldname = $fieldname . $comp_ID; if ($val_form eq "L") { # look for label ($lbl, $lbl_end) = chk_4_label; if ($lbl_end =~ m%\A %) { $log_field {$fieldname} = "$field_ct/L/$lbl"; $field_ct++; } else { @retcode = ("NG", "Invalid label"); last; } } elsif ($val_form eq "DL") { # look for detected label list ($got_ll, $ll) = look_label_list; if ($got_ll) { @det_list = split /,/, $ll; $this_patt = $det_pattern {$fieldname}; foreach $det_elem (@det_list) { if ($this_patt !~ m%/$det_elem/%) { $this_patt = 0; last; } } if ($this_patt) { $log_field {$fieldname} = "$field_ct/LL/$ll"; $field_ct++; } else { @retcode = ("NG", "Invalid list element in $fieldname"); last; } } else { @retcode = ("NG", "Invalid label-list for $fieldname"); last; } } elsif ($val_form eq "LL") { # look for a label list ($got_ll, $ll) = look_label_list; if ($got_ll) { $log_field {$fieldname} = "$field_ct/LL/$ll"; $field_ct++; } else { @retcode = ("NG", "Invalid label-list for $fieldname"); last; } } elsif ($val_form eq "A" || $val_form eq "WN") { # look for Alpha ($lbl, $lbl_end) = chk_4_label; if ($lbl_end =~ m%\A %) { # plain old label $log_field {$fieldname} = "$field_ct/L/$lbl"; $field_ct++; } elsif ($lbl_end eq "," || $lbl_end eq "=") { @retcode = ("NG", "Invalid alpha-label; lbl=$lbl"); last; } else { #not a label - try quoted string if ($the_rest =~ s%\A("([^"\\]|(\\["\\a-zA-Z]))*")( +)%%) { $log_field {$fieldname} = "$field_ct/A/$1"; $field_ct++; } elsif ($val_form eq "WN" && $the_rest =~ s%\A\*( +)%%) { # use default window_ID from event_header $log_field {$fieldname} = "$field_ct/WN/*"; $field_ct++; } else { @retcode = ("NG", "Invalid alpha"); last; } } } elsif ($val_form eq "BU") { # look for button data if ($the_rest =~ s%((shift-|ctl-|meta-|alt-)*\d+)( +)%%) { $log_field {$fieldname} = "$field_ct/BU/$1"; $field_ct++; } else { @retcode = ("NG", "Invalid button"); last; } } elsif ($val_form eq "K") { # look for keystroke data if ($the_rest =~ s%\A(\S)( +)%%) { # single character $log_field {$fieldname} = "$field_ct/K/$1"; $field_ct++; } else { # look for label ($lbl, $lbl_end) = chk_4_label; if ($lbl_end =~ m%\A %) { $log_field {$fieldname} = "$field_ct/L/$lbl"; $field_ct++; } else { @retcode = ("NG", "Invalid keydata"); last; } } } elsif ($val_form eq "N") { # look for number if ($the_rest =~ s%\A([+\-]?\d+(\.\d*)?)( +)%%) { $log_field {$fieldname} = "$field_ct/N/$1"; $field_ct++; } else { @retcode = ("NG", "Invalid number"); last; } } elsif ($val_form eq "B") { # look for boolean if ($the_rest =~ s%\A(yes|no)( +)%%) { $log_field {$fieldname} = "$field_ct/B/$1"; $field_ct++; } else { @retcode = ("NG", "Invalid boolean"); last; } } elsif ($val_form eq "XY") { # look for XY pair of integers if ($the_rest =~ s%\A([+\-]?\d+,[+\-]?\d+)( +)%%) { $log_field {$fieldname} = "$field_ct/XY/$1"; $field_ct++; } else { @retcode = ("NG", "Invalid XY-pair"); last; } } elsif ($val_form eq "SC") { # look for a scroll value if ($the_rest =~ s%\A(\d+\.\d*,\d+\.\d*)( +)%%) { $log_field {$fieldname} = "$field_ct/SC/$1"; $field_ct++; } else { @retcode = ("NG", "Invalid scroll value"); last; } } elsif ($val_form eq "TX") { # look for a textbox value if ($the_rest =~ s%\A("([^"\\]|(\\["\\a-zA-Z]))*")?(:[+-]?\d+,[+-]?\d+)?( +)%%) { $qpart = (defined $1) ? $1 : ""; $cpart = (defined $4) ? $4 : ""; if ($qpart || $cpart) { $log_field {$fieldname} = "$field_ct/TX/" . $qpart . $cpart; $field_ct++; } else { @retcode = ("NG", "Missing textbox value"); last; } } else { @retcode = ("NG", "Invalid textbox value"); last; } } elsif ($val_form eq "R") { # look for a real number if ($the_rest =~ s%\A(\d+\.\d*)( +)%%) { $log_field {$fieldname} = "$field_ct/R/$1"; $field_ct++; } else { @retcode = ("NG", "Invalid real number"); last; } } elsif ($val_form eq "3D") { # look for a triplet of signed real numbers if ($the_rest =~ s%\A([+\-]?\d+\.\d*,[+\-]?\d+\.\d*,[+\-]?\d+\.\d*)( +)%%) { $log_field {$fieldname} = "$field_ct/3D/$1"; $field_ct++; } else { @retcode = ("NG", "Invalid 3D value"); last; } } else { die "Bizarro val-form: $val_form in $rec_format"; } } # end while-loop through format if ($retcode[0] eq "NG") { errmsg ("$retcode[1] : $the_rest", "E"); } return (@retcode); } # end chk_rest ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub url_to_nn # find the nickname for a given URL { my ($url_arg) = @_; if ($url_arg =~ m%\A"(\S+)"\Z%) { #strip off quotes $url_arg = $1; } if ($url_arg =~ m%\A(\S+)#\S+\Z%) { #strip off intra-page reference $url_arg = $1; } if ($url_arg =~ m%\A(\S+)\?\S+\Z%) { #strip off parameters $url_arg = $1; } if ($url_arg =~ m%\A$hostname(\S+)\Z%io) { #strip off default hostname $url_arg = $1; # directory substitution, perhaps? if ($dir_sub) { if (index ($url_arg, $from_dir) == 0) { $url_arg = $to_dir . substr($url_arg, length($from_dir)); } } } if ($url_nn{$url_arg}) { $this_NN = $url_nn{$url_arg}; } else { errmsg ("URL=$url_arg not found in url->nn table", "E"); $this_NN = "*****"; } return (0); } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub user_action # parse user_action component of event record. { ($status, $reason) = chk_rest ("event_type/L more"); if ($status ne "OK") { return ($status); } $this_event = "/" . field_value("event_type#U") . "/"; # anticipatory search for XY_location field. ($status, $reason) = chk_rest ("screen_xy=XY? window_xy=XY? webpage_xy=XY? more"); if ($status ne "OK") { return ($status); } $num_xy_fields = 0; if ($log_field{"screen_xy#U"}) { $num_xy_fields++; } if ($log_field{"window_xy#U"}) { $num_xy_fields++; } if ($log_field{"webpage_xy#U"}) { $num_xy_fields++; } if ($num_xy_fields > 1) { errmsg_lrec ("Too many XY fields in user_action"); return ("NG"); } if ("/keypress/keyrelease/pointerpress/pointerrelease/pointermove/" =~ m%$this_event% && $num_xy_fields < 1) { errmsg_lrec ("XY field missing for user_action"); return ("NG"); } if ("/keypress/keyrelease/" =~ m%$this_event%) { ($status, $reason) = chk_rest ("key=K adhoc-event"); } elsif ("/pointerpress/pointerrelease/" =~ m%$this_event%) { ($status, $reason) = chk_rest ("button=BU adhoc-event"); } elsif ($this_event eq "/pointermove/") { ($status, $reason) = chk_rest ("adhoc-event"); } elsif ($this_event eq "/six_DOF/") { ($status, $reason) = chk_rest ("shift=3D rotate=3D adhoc-event"); } elsif ($this_event =~ m%\A/((enter)|(leave))((widget)|(window))/%) { $widget_transit = ($4 eq "widget"); ($status, $reason) = chk_rest ("adhoc-event"); } else #ad hoc user event { ($status, $reason) = chk_rest ("adhoc-event"); } return ($status); } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub widget_info # parse widget_info - same for this and other widget { ($status, $reason) = chk_rest ("widget_ID/A widget_type/L more"); if ($status ne "OK") { return ($status); } if ($valtype = $valtype_of {field_value ("widget_type" . $comp_ID)}) { ; # OK as is } else { $valtype = "A"; } ($status, $reason) = chk_rest ("value=$valtype? level=L? ready=L? adhoc-event"); return ($status); } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub window_state # parse window_state component of event record. { ($status, $reason) = chk_rest ("window_spec/WN window_op/L more"); if ($status ne "OK") { return ($status); } $this_winop = field_value("window_op" . $comp_ID); if ($this_winop eq "move" || $this_winop eq "resize") { ($status, $reason) = chk_rest ("up_left=XY low_right=XY adhoc-event"); } elsif ($this_winop eq "open" || $this_winop eq "de_icon") { ($status, $reason) = chk_rest ("up_left=XY? low_right=XY? adhoc-event"); } else { ($status, $reason) = chk_rest ("adhoc-event"); } return ($status); } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub prt_component # parse do_web_op component of event record. { my ($comp_lbl) = @_; if ($comp_ct == 0 && $gen_html) { print HTML "
    \n"; } if ($clabel = $comp_prt {$comp_lbl}) { ; # OK as is } else { $clabel = "unknown"; errmsg_lrec ("Wacky component label"); } if ($gen_html) { printf HTML "
  • #$clabel : %s\n", htmlize(substr($prev_rest, 0, length($prev_rest) - length($the_rest))); } $comp_ct++; return (0); } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub write_gen_up # parse do_web_op component of event record. { if ($prev_NN ne "*****") { # OK, prev_NN looks worthy of writing to gen-file $time_NN = int (0.5 + $rel_secs - $prev_NN_time); print UP "$prev_NN $time_NN\n"; } return (0); } ### ### ### ### ### ### ### ### ### ### ### ### ### ### sub do_web_op # parse do_web_op component of event record. { ($status, $reason) = chk_rest ("web_op_type/L more"); if ($status ne "OK") { return ($status); } $this_web_op = field_value("web_op_type" . $comp_ID); if ($this_web_op eq "print") { ($status, $reason) = chk_rest ("url=A? adhoc-event"); } elsif ($this_web_op eq "newpage") { ($status, $reason) = chk_rest ("newpage_op/L window_spec/WN url=A adhoc-event"); if ($gen_up && $open_up && $status eq "OK") { # see about generating a record for VISVIP $this_NN_op = field_value("newpage_op" . $comp_ID); $this_url = field_value("url" . $comp_ID); url_to_nn ($this_url); if ($prev_NN eq $this_NN) { if (($prev_NN_op eq "loading" || $prev_NN_op eq "request") && $this_NN_op eq "complete") { $prev_NN_time = $rel_secs; $prev_NN_op = $this_NN_op; } } else { write_gen_up(); # set up for next time thru $prev_NN = $this_NN; $prev_NN_time = $rel_secs; $prev_NN_op = $this_NN_op; } } # end gen_up } # end newpage option elsif ($this_web_op eq "page_locate") { ($status, $reason) = chk_rest ("window_spec/WN url=A more"); if ($status eq "OK") { if ($the_rest =~ m%\Ah%) { ($status, $reason) = chk_rest ("horizontal=SC vertical=SC? adhoc-event"); } else { ($status, $reason) = chk_rest ("vertical=SC adhoc-event"); } } } else { ($status, $reason) = chk_rest ("adhoc-event"); } return ($status); } ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### MAIN-LINE ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### print "\n\n\n"; @ARGV > 1 || die "Abort: need and as the first two arguments. \n"; $data_dir = $ENV{WEBLOG_DATA}; if (! defined $data_dir) { die "Abort: WEBLOG_DATA undefined. \n"; } # default is to generate HTML but not a .up file. $gen_html = 1; $gen_up = 0; $echo_parse = 0; $arg_ct = 0; $open_up = 0; $parm_host = 0; $dir_sub = 0; foreach $arg (@ARGV) { $arg_ct++; if ($arg_ct == 1) { $website_name = "$data_dir/$arg"; } elsif ($arg_ct == 2) { $session_name = $arg; } else { if ($arg eq "h-") {$gen_html = 0;} elsif ($arg eq "h+") {$gen_html = 1;} elsif ($arg eq "u-") {$gen_up = 0;} elsif ($arg eq "u+") {$gen_up = 1;} elsif ($arg eq "p-") {$echo_parse = 0;} elsif ($arg eq "p+") {$echo_parse = 1;} elsif ($arg =~ m%\Ahost=(\S+)\Z%) {$parm_host = $1;} elsif ($arg =~ m%\Adirsub=(\S+)\Z%) {$dir_sub = $1;} else { print STDERR "Strange arg: $arg \n"; } } } if ($dir_sub) { # check out dir_sub $delimit = substr ($dir_sub, 0, 1); if ($dir_sub =~ m%\A$delimit([^$delimit]+)$delimit([^$delimit]*)$delimit\Z%) { $from_dir = $1; $to_dir = $2; print STDERR "directory substitution from: $from_dir to: $to_dir\n"; } else { die "Invalid dirsub parameter. \n"; } } $session_dir = "$website_name/sessions/$session_name"; $file_pref = "$website_name/sessions/$session_name/$session_name"; open (INLOG, "<$file_pref" . ".ulog") || die "Could not open $file_pref as ulog \n"; if ($gen_html) { open (HTML, ">$file_pref" . ".html") || die "Could not open $file_pref as HTML. \n"; print HTML "\n"; print HTML "$session_name Logfile\n"; print HTML "\n"; print HTML "

    $session_name Logfile

    \n
    \n"; } if ($echo_parse) { open (PARSE, ">$file_pref" . ".parse") || die "Could not open $file_pref as PARSE. \n"; print PARSE "Parse of $session_name:\n"; } if ($gen_up) { ### First, load up url --> nn table $tfile_name = "$website_name/webstruct/url2nn.dat"; open (TFILE, $tfile_name) || die "Could not open $tfile_name as TFILE. \n"; $trec = || die "Cannot read first record of TFILE.\n"; $trec =~ m%\AHOST=(\S+)\Z% || die "Cannot find HOST in first TFILE record: $trec\n"; $hostname = "http://" . ($parm_host ? $parm_host : $1); print STDERR "Using HOST=$hostname\n"; ### generally speaking, URLs with a leading slash are to be prefaced ### with the hostname when comparing to VFILE records. while ($trec = ) { $trec =~ m%\A(\S+) --->>> (\S+)\Z% || die "Could not parse trec = $trec"; $url_nn{$1} = $2; } close (TFILE); } $task_ct = 0; # number of tasks in file $num_errs = 0; # syntax errors found so far in file $rec_ct = 0; # of input records $c_ptr = 0; # level pointer for context stack. $context[$c_ptr] = "file"; @days_per_month = (31,28,31,30,31,30,31,31,30,31,30,31); $log_color = "#0000dd"; $task_color = "#660000"; $q_color = "#008800"; $resp_color = "#005533"; $event_color = "#990055"; $note_color = "#550099"; $base_days = "empty"; $prev_days = -1; $prev_secs = -1; # all defined keywords $all_kws = "/browser_hw/browser_sw/button/generator/horizontal/key/level/low_right/procedure/ready/rotate/screen_xy/shift/sset/detected/subject/testdata/tester/time_zone/up_left/value/version/vertical/webpage_xy/website/window_xy/"; # expected value types of various widget types %valtype_of = ( "button" => "B", "radio" => "B", "checkbox" => "B", "textbox" => "TX", "menu" => "A", "slider" => "N", "scrollbarH" => "SC", "scrollbarV" => "SC", "handle" => "B", "link" => "B", ); # full labels for components %comp_prt = ( "U" => "user_action", "W" => "direct_widget", "OW" => "other_widget", "WN" => "window", "OP" => "webpage" ); %det_pattern = ("detected_U" => "/keypress/keyrelease/pointerpress/pointerrelease/pointermove/six_DOF/enterwidget/leavewidget/enterwindow/leavewindow/", "detected_W" => "/button/radio/checkbox/textbox/menu/slider/scrollbarH/scrollbarV/handle/link/", "detected_WN" => "/open/close/icon/de_icon/move/resize/raise/lower/focus/blur/", "detected_OP" => "/print/newpage:request/newpage:loading/newpage:complete/page_locate/" ); $det_pattern {"detected_OW"} = $det_pattern {"detected_W"}; # put this up here to tell chk_rest that we are not yet in # repeating component of the record. $sys_effect_ct = 0; # system component count $current_task = "NULL"; ### Finally, main read-loop here ### while ($lrec = ) { $rec_ct++; next if ($lrec =~ m%\A--%); # skip comments chomp($lrec); ## delete trailing spaces and Carriage-returns $lrec =~ s%[ \r]+\Z%%; if (! ($lrec =~ m%\A(\w+) *(.*)%)) { errmsg_lrec ("Could not find rectype in logrec."); next; } $rectype = $1; $the_rest = $2 . " "; # want trailing space as field terminator # get set to construct new hash table undef %log_field; $log_field {"rectype"} = "1/L/$rectype"; if ($rectype eq "qrec") { $field_ct = 2; # field count in record } else { # process time field if (! ($the_rest =~ m%\A(\S+) *(.*)%)) { errmsg_lrec ("Could not find time field in logrec."); next; } $time_field = $1; $the_rest = $2; if (parse_time) { next; # found a mistake } # use first time as base if ($base_days eq "empty") { $base_days = $log_days; $base_secs = $log_secs; } $rel_secs = $log_secs - $base_secs + 86400 * ($log_days - $base_days); $log_field {"time"} = "2/T/$time_field"; $log_field {"days"} = "3/I/$log_days"; $log_field {"secs"} = "4/N/$log_secs"; $field_ct = 5; # field count in record } # check recordtype against current file context $comp_ID = ""; if ($rectype eq "loghead") { head_chunk ("file", "log", $log_color, "Loghead"); ($status, $reason) = chk_rest ("subject=A version=L time_zone=N generator=A? browser_hw=A? browser_sw=A? tester=A? procedure=A? sset=A? detected_U=DL? detected_W=DL? detected_OW=DL? detected_WN=DL? detected_OP=DL? detected_undef_U=LL? detected_undef_W=LL? detected_undef_OW=LL? detected_undef_WN=LL? detected_undef_OP=LL? adhoc"); } elsif ($rectype eq "logend") { $rc = tail_chunk ("log", $log_color, "Logend"); if ($rc == 0) { $context[$c_ptr] = "eof"; } chk_rest ("adhoc"); } elsif ($rectype eq "taskhead") { head_chunk ("log", "task", $task_color, "Taskhead"); $task_ct++; chk_rest ("tasktype/A website=A testdata=A adhoc"); $current_task = field_value("tasktype"); if ($current_task eq "*****") { $current_task = "NULL"; } ## print "curtask = $current_task \n"; if ($gen_up) { if ($open_up) { errmsg_lrec ("UP file already open."); } else { $clean_task = clean_name ($current_task); $upfile = ">$session_dir/" . "$clean_task" . "-t$task_ct" . ".up"; open (UP, $upfile) || die "Could not open UP: $upfile \n"; $open_up = 1; $prev_NN_time = 0; # keep track of when you get first page. $prev_NN = "*****"; $prev_NN_op = "*****"; } } } elsif ($rectype eq "taskend") { tail_chunk ("task", $task_color, "Taskend"); if ($gen_up) { if ($open_up) { write_gen_up; $open_up = 0; close (UP); } else { errmsg_lrec ("UP file already closed."); } } chk_rest ("tasktype/A adhoc"); if ( field_value("tasktype") ne $current_task) { errmsg_lrec ("Task-ID does not match head=$current_task"); } } elsif ($rectype eq "event") { if (! context_ok ("task/log")) { errmsg_lrec ("event out of context"); next; } ($status, $reason) = chk_rest ("window/A url=A? more"); if ($status eq "OK" && $gen_html) { $tmp = field_value("url"); if ($tmp eq "*****") { $tmp = "" } else { $tmp = " url=$tmp"; } prt_rec_fields ($event_color, "Event", 1, "window=" . field_value("window") . $tmp ); } # keep track of whether a #W component is required. $widget_transit = 0; $comp_ct = 0; # component count if ($status eq "OK" && $the_rest =~ s%\A#U +%%) { $comp_ID = "#U"; $prev_rest = $the_rest; $status = user_action(); prt_component("U"); } if ($status eq "OK" && $the_rest =~ s%\A#W +%%) { $comp_ID = "#W"; $prev_rest = $the_rest; $status = widget_info(); prt_component("W"); } else { if ($widget_transit) { errmsg ("Required #W component is missing.", "E"); } } # $sys_effect_ct controls the parsing of repetitive # components. while ($status eq "OK" && $the_rest =~ s%\A#(OW|WN|OP) +%%) { $sys_field = $1; $sys_effect_ct++; $comp_ID = "#" . $sys_field . "[" . $sys_effect_ct . "]"; $prev_rest = $the_rest; if ($sys_field eq "OW") { $status = widget_info(); } elsif ($sys_field eq "WN") { $status = window_state(); } else { $status = do_web_op(); } prt_component($sys_field); } $sys_effect_ct = 0; if ($comp_ct < 1) { errmsg_lrec ("No event components found."); } else { if ($gen_html) { print HTML "
\n"; } } } elsif ($rectype eq "note") { if (! context_ok ("task/log")) { errmsg_lrec ("note out of context"); next; } ($status, $reason) = chk_rest ("author/A note_content/A more"); if ($status eq "OK" && $gen_html) { prt_rec_fields ($note_color, "Note", 1, "author=" . field_value("author") . " note_content=" . field_value("note_content") . " $the_rest"); chk_rest ("adhoc"); } } elsif ($rectype eq "qhead") { head_chunk ("log/task", "quest", $q_color, "Questionnaire"); chk_rest ("questionnaireID/A adhoc"); } elsif ($rectype eq "qend") { tail_chunk ("quest", $q_color, "Questionnaire_end"); chk_rest ("adhoc"); } elsif ($rectype eq "qrec") { if (context_ok ("quest")) { if ($gen_html) { prt_rec_fields ($resp_color, "Question", 0, $the_rest); } chk_rest ("questionID/A response/A adhoc"); } else { errmsg_lrec ("qrec out of context"); } } else { errmsg_lrec ("Unknown record-type"); } ### OK, record is as parsed as it's going to be - generate output if ($echo_parse) { print PARSE "---------------------------\n"; } # retrieve fields in order and write out as need be. foreach $l_key (sort compare_fields keys (%log_field)) { $log_field{$l_key} =~ m%\A(\d+)/([A-Z]+)/(.+)%; ## $ndx = $1; $ftype = $2; $fval = $3; if ($echo_parse) { print PARSE "$l_key => $ftype / $fval \n"; } } # end FOR } # end WHILE main-loop if ($context[$c_ptr] ne "eof") { errmsg ("Hit EOF, but no logend found.", "E"); } if ($gen_html) { print HTML "\n"; print HTML "

Number of errors detected: $num_errs

\n"; print HTML "\n"; close (HTML); } if ($echo_parse) { print PARSE "--------------\nEnd of parse of $session_name\n"; close (PARSE); } close (INLOG); if ($num_errs == 0) { print STDERR "Normal completion with no errors.\n"; } else { print STDERR "Completed with $num_errs errors.\n"; }