383 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			383 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| namespace W3TC;
 | |
| 
 | |
| 
 | |
| 
 | |
| /**
 | |
|  * Access log reader - provides statistics data from http server access log
 | |
|  */
 | |
| class UsageStatistics_Source_AccessLog {
 | |
| 	// configuration
 | |
| 	private $line_regexp;
 | |
| 
 | |
| 	private $max_line = '';
 | |
| 	private $max_time = 0;
 | |
| 	private $min_time;
 | |
| 	private $min_line = '';
 | |
| 
 | |
| 	// running values
 | |
| 
 | |
| 	// read access log after that timestamp
 | |
| 	private $max_already_counted_timestamp;
 | |
| 
 | |
| 	// what was loaded now in this cycle
 | |
| 	private $max_now_counted_timestamp = null;
 | |
| 
 | |
| 	// if need to read more access log chunks
 | |
| 	private $more_log_needed = true;
 | |
| 
 | |
| 	// where data aggregated
 | |
| 	private $history;
 | |
| 	private $history_current_pos;
 | |
| 	private $history_current_item;
 | |
| 	private $history_current_timestamp_start;
 | |
| 	private $history_current_timestamp_end;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	static public function w3tc_usage_statistics_summary_from_history( $summary, $history ) {
 | |
| 		$dynamic_requests_total = Util_UsageStatistics::sum( $history,
 | |
| 			array( 'access_log', 'dynamic_count' ) );
 | |
| 		$dynamic_timetaken_ms_total = Util_UsageStatistics::sum( $history,
 | |
| 			array( 'access_log', 'dynamic_timetaken_ms' ) );
 | |
| 		$static_requests_total = Util_UsageStatistics::sum( $history,
 | |
| 			array( 'access_log', 'static_count' ) );
 | |
| 		$static_timetaken_ms_total = Util_UsageStatistics::sum( $history,
 | |
| 			array( 'access_log', 'static_timetaken_ms' ) );
 | |
| 
 | |
| 
 | |
| 		$summary['access_log'] = array(
 | |
| 			'dynamic_requests_total_v' => $dynamic_requests_total,
 | |
| 			'dynamic_requests_total' => Util_UsageStatistics::integer(
 | |
| 				$dynamic_requests_total ),
 | |
| 			'dynamic_requests_per_second' => Util_UsageStatistics::value_per_period_seconds(
 | |
| 				$dynamic_requests_total, $summary ),
 | |
| 			'dynamic_requests_timing' => Util_UsageStatistics::integer_divideby(
 | |
| 				$dynamic_timetaken_ms_total, $dynamic_requests_total ),
 | |
| 			'static_requests_total' => Util_UsageStatistics::integer(
 | |
| 				$static_requests_total ),
 | |
| 			'static_requests_per_second' => Util_UsageStatistics::value_per_period_seconds(
 | |
| 				$static_requests_total, $summary ),
 | |
| 			'static_requests_timing' => Util_UsageStatistics::integer_divideby(
 | |
| 				$static_timetaken_ms_total, $static_requests_total ),
 | |
| 		);
 | |
| 
 | |
| 		return $summary;
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	/**
 | |
| 	 * array( 'webserver', 'format', 'filename' )
 | |
| 	 */
 | |
| 	public function __construct( $data ) {
 | |
| 		$format = $data['format'];
 | |
| 		$webserver = $data['webserver'];
 | |
| 		$this->accesslog_filename = str_replace( '://', '/', $data['filename'] );
 | |
| 
 | |
| 		if ( $webserver == 'nginx' ) {
 | |
| 			$line_regexp = $this->logformat_to_regexp_nginx( $format );
 | |
| 		} else {
 | |
| 			$line_regexp = $this->logformat_to_regexp_apache( $format );
 | |
| 		}
 | |
| 
 | |
| 		$this->line_regexp = apply_filters( 'w3tc_ustats_access_log_format_regexp',
 | |
| 			$line_regexp );
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	public function w3tc_usage_statistics_history_set( $history ) {
 | |
| 		$this->max_already_counted_timestamp = (int)get_site_option( 'w3tc_stats_history_access_log' );
 | |
| 		if ( isset( $history[0]['timestamp_start'] ) &&
 | |
| 				$history[0]['timestamp_start'] > $this->max_already_counted_timestamp ) {
 | |
| 			$this->max_already_counted_timestamp = $history[0]['timestamp_start'] - 1;
 | |
| 		}
 | |
| 
 | |
| 		$this->history = $history;
 | |
| 		$this->min_time = time();
 | |
| 		$this->setup_history_item( count( $history ) - 1 );
 | |
| 
 | |
| 		$h = @fopen( $this->accesslog_filename, 'rb' );
 | |
| 		if ( !$h ) {
 | |
| 			error_log( 'Failed to open access log for usage statisics collection' );
 | |
| 			return $history;
 | |
| 		}
 | |
| 
 | |
| 		fseek( $h, 0, SEEK_END );
 | |
| 		$pos = ftell( $h );
 | |
| 		$unparsed_head = '';
 | |
| 
 | |
| 		while ( $pos >= 0 && $this->more_log_needed ) {
 | |
| 			$pos -= 8192;
 | |
| 			if ( $pos <= 0 ) {
 | |
| 				$pos = 0;
 | |
| 			}
 | |
| 			fseek( $h, $pos );
 | |
| 
 | |
| 			$s = fread( $h, 8192 );
 | |
| 
 | |
| 			$unparsed_head = $this->parse_string( $s . $unparsed_head, $pos > 0 );
 | |
| 			if ( $pos <= 0 ) {
 | |
| 				$this->more_log_needed = false;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if ( defined( 'W3TC_DEBUG' ) && W3TC_DEBUG ) {
 | |
| 			Util_Debug::log( 'time',
 | |
| 				"period " .
 | |
| 				date( DATE_ATOM, $this->max_already_counted_timestamp ) . ' - ' .
 | |
| 				date( DATE_ATOM, $this->max_now_counted_timestamp ) . "\n" .
 | |
| 				"min line: " . $this->min_line . "\n" .
 | |
| 				"max line: " . $this->max_line );
 | |
| 		}
 | |
| 
 | |
| 		if ( !is_null( $this->max_now_counted_timestamp ) ) {
 | |
| 			update_site_option( 'w3tc_stats_history_access_log',
 | |
| 				$this->max_now_counted_timestamp );
 | |
| 		}
 | |
| 
 | |
| 		return $this->history;
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	private function setup_history_item( $pos ) {
 | |
| 		$this->history_current_pos = $pos;
 | |
| 
 | |
| 		if ( !isset( $this->history[$pos]['access_log'] ) ) {
 | |
| 			$this->history[$pos]['access_log'] = array(
 | |
| 				'dynamic_count' => 0,
 | |
| 				'dynamic_timetaken_ms' => 0,
 | |
| 				'static_count' => 0,
 | |
| 				'static_timetaken_ms' => 0,
 | |
| 			);
 | |
| 		}
 | |
| 
 | |
| 		$this->history_current_item = &$this->history[$pos]['access_log'];
 | |
| 		$this->history_current_timestamp_start = $this->history[$pos]['timestamp_start'];
 | |
| 		$this->history_current_timestamp_end = $this->history[$pos]['timestamp_end'];
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	private function parse_string( $s, $skip_first_line ) {
 | |
| 		$s_length = strlen( $s );
 | |
| 		$unparsed_head = '';
 | |
| 		$lines = array();
 | |
| 
 | |
| 		$n = 0;
 | |
| 		if ( $skip_first_line ) {
 | |
| 			for ( ; $n < $s_length; $n++ ) {
 | |
| 				$c = substr( $s, $n, 1 );
 | |
| 				if ( $c == "\r" || $c == "\n" ) {
 | |
| 					$unparsed_head = substr( $s, 0, $n + 1 );
 | |
| 					break;
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		$line_start = $n;
 | |
| 		$line_elements = array();
 | |
| 		$line_element_start = $n;
 | |
| 
 | |
| 		for ( ; $n < $s_length; $n++ ) {
 | |
| 			$c = substr( $s, $n, 1 );
 | |
| 			if ( $c == "\r" || $c == "\n" ) {
 | |
| 				if ( $n > $line_start ) {
 | |
| 					$lines[] = substr( $s, $line_start, $n - $line_start );
 | |
| 				}
 | |
| 
 | |
| 				$line_start = $n + 1;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		// last line comes first, boundary checks logic based on that
 | |
| 		for ( $n = count( $lines ) - 1; $n >= 0; $n-- ) {
 | |
| 			$this->push_line( $lines[$n] );
 | |
| 		}
 | |
| 
 | |
| 		return $unparsed_head;
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	private function push_line( $line ) {
 | |
| 		$e = array();
 | |
| 		preg_match( $this->line_regexp, $line, $e );
 | |
| 
 | |
| 		$e = apply_filters( 'w3tc_ustats_access_log_line_elements', $e, $line );
 | |
| 		if ( !isset( $e['request_line'] ) || !isset( $e['date'] ) ) {
 | |
| 			if ( defined( 'W3TC_DEBUG' ) && W3TC_DEBUG ) {
 | |
| 				Util_Debug::log( 'time',
 | |
| 					"line $line cant be parsed using regexp $this->line_regexp, request_line or date elements missing"
 | |
| 				);
 | |
| 			}
 | |
| 			return;
 | |
| 		}
 | |
| 
 | |
| 		$date_string = $e['date'];
 | |
| 		$time = strtotime($date_string);
 | |
| 
 | |
| 		// dont read more if we touched entries before timeperiod of collection
 | |
| 		if ( $time <= $this->max_already_counted_timestamp ) {
 | |
| 			$this->more_log_needed = false;
 | |
| 			return;
 | |
| 		}
 | |
| 		if ( $time > $this->history_current_timestamp_end ) {
 | |
| 			return;
 | |
| 		}
 | |
| 		while ( $time < $this->history_current_timestamp_start ) {
 | |
| 			if ( $this->history_current_pos <= 0 ) {
 | |
| 				$this->more_log_needed = false;
 | |
| 				return;
 | |
| 			}
 | |
| 			$this->setup_history_item( $this->history_current_pos - 1 );
 | |
| 		}
 | |
| 		if ( is_null( $this->max_now_counted_timestamp ) ) {
 | |
| 			$this->max_now_counted_timestamp = $time;
 | |
| 		}
 | |
| 
 | |
| 		if ( defined( 'W3TC_DEBUG' ) && W3TC_DEBUG ) {
 | |
| 			if ($time < $this->min_time) {
 | |
| 				$this->min_line = $line;
 | |
| 				$this->min_time = $time;
 | |
| 			}
 | |
| 			if ($time > $this->max_time) {
 | |
| 				$this->max_line = $line;
 | |
| 				$this->max_time = $time;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		$http_request_line = $e['request_line'];
 | |
| 		$http_request_line_items = explode( ' ', $http_request_line );
 | |
| 		$uri = $http_request_line_items[1];
 | |
| 
 | |
| 		$time_ms = 0;
 | |
| 		if ( isset( $e['time_taken_microsecs'] ) ) {
 | |
| 			$time_ms = (int)($e['time_taken_microsecs'] / 1000);
 | |
| 		} elseif ( isset( $e['time_taken_ms'] ) ) {
 | |
| 			$time_ms = (int)$e['time_taken_ms'];
 | |
| 		}
 | |
| 
 | |
| 		$m = null;
 | |
| 		preg_match('~\\.([a-zA-Z0-9]+)(\?.+)?$~', $uri, $m );
 | |
| 		if ( $m && $m[1] != 'php') {
 | |
| 			$this->history_current_item['static_count']++;
 | |
| 			$this->history_current_item['static_timetaken_ms'] += $time_ms;
 | |
| 		} else {
 | |
| 			$this->history_current_item['dynamic_count']++;
 | |
| 			$this->history_current_item['dynamic_timetaken_ms'] += $time_ms;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	// default: %h %l %u %t \"%r\" %>s %b
 | |
| 	// common : %h %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"
 | |
| 	public function logformat_to_regexp_apache( $format ) {
 | |
| 		// remove modifiers like %>s, %!400,501{User-agent}i
 | |
| 		$format = preg_replace('~%[<>!0-9]([a-zA-Z{])~', '%$1', $format);
 | |
| 
 | |
| 		// remove modifiers %{User-agent}^ti, %{User-agent}^to
 | |
| 		$format = preg_replace('~%({[^}]+})(^ti|^to)~', '%$1z', $format);
 | |
| 
 | |
| 		// take all quoted vars
 | |
| 		$format = preg_replace_callback('~\\\"(%[a-zA-Z%]|%{[^}]+}[a-zA-Z])\\\"~',
 | |
| 			array( $this, 'logformat_to_regexp_apache_element_quoted' ),
 | |
| 			$format);
 | |
| 
 | |
| 		// take all remaining vars
 | |
| 		$format = preg_replace_callback('~(%[a-zA-Z%]|%{[^}]+}[a-zA-Z])~',
 | |
| 			array( $this, 'logformat_to_regexp_apache_element_naked' ),
 | |
| 			$format);
 | |
| 
 | |
| 		return '~' . $format . '~';
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	public function logformat_to_regexp_apache_element_quoted( $match ) {
 | |
| 		$v = $match[1];
 | |
| 
 | |
| 		if ( $v == '%r' ) {
 | |
| 			return '\"(?<request_line>[^"]+)\"';
 | |
| 		}
 | |
| 
 | |
| 		// default behavior, expected value doesnt contain spaces
 | |
| 		return '\"([^"]+)\"';
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	public function logformat_to_regexp_apache_element_naked( $match ) {
 | |
| 		$v = $match[1];
 | |
| 
 | |
| 		if ( $v == '%t' ) {
 | |
| 			return '\[(?<date>[^\]]+)\]';
 | |
| 		} elseif ( $v == '%D' ) {
 | |
| 			return '(?<time_taken_microsecs>[0-9]+)';
 | |
| 		}
 | |
| 
 | |
| 		// default behavior, expected value doesnt contain spaces
 | |
| 		return '([^ ]+)';
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	// default: $remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"
 | |
| 	// w3tc: $remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" $request_time
 | |
| 	public function logformat_to_regexp_nginx( $format ) {
 | |
| 		// escape quotes
 | |
| 		$format = preg_replace_callback('~([\"\[\]])~',
 | |
| 			array( $this, 'logformat_to_regexp_nginx_quote' ),
 | |
| 			$format);
 | |
| 
 | |
| 		// take all quoted vars
 | |
| 		$format = preg_replace_callback('~\\\"(\$[a-zA-Z0-9_]+)\\\"~',
 | |
| 			array( $this, 'logformat_to_regexp_nginx_element_quoted' ),
 | |
| 			$format);
 | |
| 
 | |
| 		// take all remaining vars
 | |
| 		$format = preg_replace_callback('~(\$[a-zA-Z0-9_]+)~',
 | |
| 			array( $this, 'logformat_to_regexp_nginx_element_naked' ),
 | |
| 			$format);
 | |
| 
 | |
| 		return '~' . $format . '~';
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	public function logformat_to_regexp_nginx_quote( $match ) {
 | |
| 		return '\\' . $match[1];
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	public function logformat_to_regexp_nginx_element_quoted( $match ) {
 | |
| 		$v = $match[1];
 | |
| 
 | |
| 		if ( $v == '$request' ) {
 | |
| 			return '\"(?<request_line>[^"]+)\"';
 | |
| 		}
 | |
| 
 | |
| 		// default behavior, expected value doesnt contain spaces
 | |
| 		return '\"([^"]+)\"';
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	public function logformat_to_regexp_nginx_element_naked( $match ) {
 | |
| 		$v = $match[1];
 | |
| 
 | |
| 		if ( $v == '$time_local' ) {
 | |
| 			return '(?<date>[^\]]+)';
 | |
| 		} elseif ( $v == '$request_time' ) {
 | |
| 			return '(?<time_taken_ms>[0-9.]+)';
 | |
| 		}
 | |
| 
 | |
| 		// default behavior, expected value doesnt contain spaces
 | |
| 		return '([^ ]+)';
 | |
| 	}
 | |
| }
 |