laipower/wp-content/plugins/static-html-output-plugin/plugin/WP2Static/SiteCrawler.php

2 lines
11 KiB
PHP
Raw Normal View History

2020-04-07 13:03:04 +00:00
<?php
class SiteCrawler extends WP2Static { public function __construct() { $this->loadSettings( array( 'wpenv', 'crawling', 'processing', 'advanced', ) ); if ( isset( $this->settings['crawl_delay'] ) ) { sleep( $this->settings['crawl_delay'] ); } $this->processed_file = ''; $this->file_type = ''; $this->response = ''; $this->content_type = ''; $this->url = ''; $this->full_url = ''; $this->extension = ''; $this->archive_dir = ''; $this->list_of_urls_to_crawl_path = ''; $this->urls_to_crawl = ''; if ( ! defined( 'WP_CLI' ) ) { if ( $_POST['ajax_action'] === 'crawl_again' ) { $this->crawl_discovered_links(); } elseif ( $_POST['ajax_action'] === 'crawl_site' ) { $this->crawl_site(); } } } public function generate_discovered_links_list() { $second_crawl_file_path = $this->settings['wp_uploads_path'] . '/WP-STATIC-2ND-CRAWL-LIST.txt'; $already_crawled = file( $this->settings['wp_uploads_path'] . '/WP-STATIC-INITIAL-CRAWL-LIST.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES ); $unique_discovered_links = array(); $discovered_links_file = $this->settings['wp_uploads_path'] . '/WP-STATIC-DISCOVERED-URLS.txt'; if ( is_file( $discovered_links_file ) ) { $discovered_links = file( $discovered_links_file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES ); $unique_discovered_links = array_unique( $discovered_links ); sort( $unique_discovered_links ); } file_put_contents( $this->settings['wp_uploads_path'] . '/WP-STATIC-DISCOVERED-URLS-LOG.txt', implode( PHP_EOL, $unique_discovered_links ) ); chmod( $this->settings['wp_uploads_path'] . '/WP-STATIC-DISCOVERED-URLS-LOG.txt', 0664 ); file_put_contents( $this->settings['wp_uploads_path'] . '/WP-STATIC-DISCOVERED-URLS-TOTAL.txt', count( $unique_discovered_links ) ); chmod( $this->settings['wp_uploads_path'] . '/WP-STATIC-DISCOVERED-URLS-TOTAL.txt', 0664 ); $discovered_links = array_diff( $unique_discovered_links, $already_crawled ); file_put_contents( $second_crawl_file_path, implode( PHP_EOL, $discovered_links ) ); chmod( $second_crawl_file_path, 0664 ); copy( $second_crawl_file_path, $this->settings['wp_uploads_path'] . '/WP-STATIC-FINAL-2ND-CRAWL-LIST.txt' ); chmod( $this->settings['wp_uploads_path'] . '/WP-STATIC-FINAL-2ND-CRAWL-LIST.txt', 0664 ); } public function crawl_discovered_links() { if ( defined( 'WP_CLI' ) && ! defined( 'CRAWLING_DISCOVERED' ) ) { define( 'CRAWLING_DISCOVERED', true ); } $second_crawl_file_path = $this->settings['wp_uploads_path'] . '/WP-STATIC-2ND-CRAWL-LIST.txt'; if ( ! is_file( $second_crawl_file_path ) ) { $this->generate_discovered_links_list(); } $this->list_of_urls_to_crawl_path = $this->settings['wp_uploads_path'] . '/WP-STATIC-FINAL-2ND-CRAWL-LIST.txt'; if ( ! is_file( $this->list_of_urls_to_crawl_path ) ) { require_once dirname( __FILE__ ) . '/../WP2Static/WsLog.php'; WsLog::l( 'ERROR: LIST OF URLS TO CRAWL NOT FOUND AT: ' . $this->list_of_urls_to_crawl_path ); die(); } else { if ( filesize( $this->list_of_urls_to_crawl_path ) ) { $this->crawlABitMore(); } else { if ( ! defined( 'WP_CLI' ) ) { echo 'SUCCESS'; } } } } public function crawl_site() { $this->list_of_urls_to_crawl_path = $this->settings['wp_uploads_path'] . '/WP-STATIC-FINAL-2ND-CRAWL-LIST.txt'; if ( is_file( $this->list_of_urls_to_crawl_path ) ) { $this->crawl_discovered_links(); return; } $this->list_of_urls_to_crawl_path = $this->settings['wp_uploads_path'] . '/WP-STATIC-FINAL-CRAWL-LIST.txt'; if ( ! is_file( $this->list_of_urls_to_crawl_path ) ) { require_once dirname( __FILE__ ) . '/../WP2Static/WsLog.php'; WsLog::l( 'ERROR: LIST OF URLS TO CRAWL NOT FOUND AT: ' . $this->list_of_urls_to_crawl_path ); die(); } else { if ( filesize( $this->list_of_urls_to_crawl_path ) ) { $this->crawlABitMore(); } else { if ( ! defined( 'WP_CLI' ) ) { echo 'SUCCESS'; } } } } public function crawlABitMore() { $batch_of_links_to_crawl = array(); $this->urls_to_crawl = file( $this->list_of_urls_to_crawl_path, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES ); $total_links = count( $this->urls_to_crawl ); if ( $total_links < 1 ) { require_once dirname( __FILE__ ) . '/../WP2Static/WsLog.php'; WsLog::l( 'ERROR: L