HEX
Server: Apache/2.4.58 (Ubuntu)
System: Linux ns3133907 6.8.0-86-generic #87-Ubuntu SMP PREEMPT_DYNAMIC Mon Sep 22 18:03:36 UTC 2025 x86_64
User: cssnetorguk (1024)
PHP: 8.2.28
Disabled: NONE
Upload Files
File: /home/pbyh.co.uk/public_html/wp-content/plugins/complianz-gdpr/websitescan/class-wsc-scanner.php
<?php
defined('ABSPATH') or die();


if (!class_exists("cmplz_wsc_scanner")) {

	class cmplz_wsc_scanner
	{
		private static $_this;

		const WSC_SCANNER_ENDPOINT = 'https://scan.complianz.io';
		const WSC_SCANNER_DEPTH = 1;
		const WSC_SCANNER_VISIT = 1;
		const WSC_SCANNER_SOURCE = 'complianz-scan';
		const WSC_SCANNER_WEBHOOK_PATH = 'complianz/v1/wsc-scan';

		/**
		 * Class constructor for the WSC scanner class.
		 *
		 * Initializes the WSC scanner class and sets it as a singleton class.
		 */
		function __construct()
		{
			if (isset(self::$_this)) {
				wp_die(sprintf('%s is a singleton class and you cannot create a second instance.',
					get_class($this)));
			}
			self::$_this = $this;
			$this->init_hooks();
		}


		/**
		 * Retrieve the instance of the class.
		 *
		 * @return object The instance of the class.
		 */
		static function this(): object
		{
			return self::$_this;
		}


		/**
		 * Initialize the hooks for the WSC scanner class.
		 *
		 * This function initializes the hooks for the WSC scanner class by adding an action
		 * to the `cmplz_remote_cookie_scan` hook.
		 *
		 * @return void
		 */
		private function init_hooks(): void
		{
			add_action('cmplz_remote_cookie_scan', array($this, 'wsc_scan_process'));
			add_action('admin_init', array($this, 'wsc_scan_init'));
		}


		/**
		 * Check if the WSC scan is enabled.
		 *
		 * This function verifies several conditions to determine if the WSC scan is enabled:
		 * - If the site URL is 'localhost', the scan is disabled.
		 * - If the server address is 'localhost', the scan is disabled.
		 * - If the WSC scan circuit breaker is open, the scan is disabled.
		 * - If the hybrid scan is disabled, the scan is disabled.
		 * - If there is no token, the scan is disabled.
		 * If all conditions are met, the scan is enabled.
		 *
		 * @return bool True if the WSC scan is enabled, false otherwise.
		 */
		public function wsc_scan_enabled(): bool
		{
			// if localhost, return false
			$site_url = site_url();
			$host = parse_url($site_url, PHP_URL_HOST);

			if ($host === 'localhost') {
				return false;
			}

			// if server addr is localhost, return false
			if (!empty($_SERVER['SERVER_ADDR']) && $_SERVER['SERVER_ADDR'] === '127.0.0.1') {
				return false;
			}

			// if the wsc scan is enabled by the user through the APIs settings
			if (get_option('cmplz_wsc_status') !== 'enabled') {
				return false;
			}

			// circuit breaker
			if (!$this->wsc_check_cb()) {
				return false;
			}

			// if no token, return false
			if (!cmplz_wsc_auth::get_token()) {
				return false;
			}

			return true;
		}


		/**
		 * Check if the WSC scan circuit breaker is open.
		 *
		 * This function checks the status of the WSC scan circuit breaker by retrieving the value from the transient cache.
		 * If the value is empty, it retrieves the status from the WSC API and stores it in the transient cache for 5 minutes.
		 *
		 * @return bool True if the WSC scan circuit breaker is open, false otherwise.
		 */
		public function wsc_check_cb(): bool
		{
			$cb = cmplz_get_transient('wsc_scanner_cb_enabled'); // check the status of the cb
			if (empty($cb)) {
				$cb = cmplz_wsc_auth::wsc_api_open('scanner');
				cmplz_set_transient('wsc_scanner_cb_enabled', $cb, 300); // store the status for 5 minutes
			}
			return $cb;
		}


		/**
		 * Start the WSC scan.
		 *
		 * This function initiates the WSC scan process by sending a request to the WSC API.
		 * It retrieves the scan URL, depth, visit, source, and token from the options and sends them in the request body.
		 * If the request is successful, it stores the scan ID, created at, and status in the wp options.
		 *
		 * @return void
		 */
		private function wsc_scan_start(): void
		{
			// retrieve the token
			$token = cmplz_wsc_auth::get_token(true); // get a new token

			if (!$token) {
				cmplz_wsc_logger::log_errors('wsc_scan_start', 'COMPLIANZ: no token');
				return;
			}

			$url = esc_url_raw($this->wsc_scan_get_url());

			$depth = self::WSC_SCANNER_DEPTH;
			$visit = self::WSC_SCANNER_VISIT;
			$source = self::WSC_SCANNER_SOURCE;

			$body = array(
				'url' => $url,
				'visit' => $visit, // max n of links to crawl
				'depth' => $depth, // max depth of the pages to crawl
				'acceptBanner' => 'true', // accept the banner if present
				'getTrackers' => 'true', // enable trackers detect as cookies, webstorage, etc
				'source' => $source, // complianz
				"detectTechnologies" => 'false',
				"detectTcf" => 'false',
				"detectLanguages" => 'false',
				"detectGoogleConsentMode" => 'false',
			);

			// use the webhook only with ssl
			$webhook_endpoint = esc_url_raw(get_rest_url(null, self::WSC_SCANNER_WEBHOOK_PATH));
			$parsed_webhook_endpoint = parse_url($webhook_endpoint);
			if (isset($parsed_webhook_endpoint['scheme']) && $parsed_webhook_endpoint['scheme'] === 'https') {
				$body['webhook'] = $webhook_endpoint;
			}

			$request = wp_remote_post(
				self::WSC_SCANNER_ENDPOINT . '/api/v1/scan',
				array(
					'headers' => array(
						'Content-Type' => 'application/json',
						'Authorization' => 'oauth-' . $token
					),
					'timeout' => 15,
					'sslverify' => true,
					'body' => json_encode($body)
				)
			);

			if (is_wp_error($request)) {
				cmplz_wsc_logger::log_errors('wsc_scan_start', 'COMPLIANZ: scan request failed, error: ' . $request->get_error_message());
				return;
			}

			$response = json_decode(wp_remote_retrieve_body($request));

			if (!isset($response->id)) {
				cmplz_wsc_logger::log_errors('wsc_scan_start', 'COMPLIANZ: no id in response');
				return;
			}

			// use these options for the webhooks
			update_option('cmplz_wsc_scan_id', $response->id, false);
			update_option('cmplz_wsc_scan_createdAt', $response->createdAt, false);
			update_option('cmplz_wsc_scan_status', 'progress', false);
		}


		/**
		 * Process the WSC scan.
		 *
		 * Initiates the WSC scan process by checking if the scan is enabled.
		 * If enabled, it retrieves the scan status and processes the scan accordingly.
		 * The process includes starting the scan, checking the scan status, and retrieving cookies.
		 * Updates the scan status and progress based on the scan results.
		 * Repeats the process until the scan is completed or the maximum number of iterations is reached.
		 * The process is triggered by the `cmplz_remote_cookie_scan` action and stops if the scan is not enabled,
		 * the maximum iterations are reached, or the scan status is 'failed'.
		 *
		 * @return void
		 */
		public function wsc_scan_process(): void
		{
			if (!$this->wsc_scan_enabled()) {
				cmplz_wsc_logger::log_errors('wsc_scan_process', 'COMPLIANZ: wsc scan is not enabled');
				return;
			}

			$status = 'not-started';
			$max_iterations = 25;
			$cookies = [];

			$iteration = (int)get_option("cmplz_wsc_scan_iteration");
			$iteration++;

			// reached the max iterations the scan is completed
			if ($iteration > $max_iterations) {
				update_option('cmplz_wsc_scan_status', 'completed', false);
				update_option('cmplz_wsc_scan_progress', 100);
				return;
			}

			update_option("cmplz_wsc_scan_iteration", $iteration, false);

			// if the scan is not yet started
			if (!get_option('cmplz_wsc_scan_id')) {
				$this->wsc_scan_start(); // start the scan and store the scan id and scan status
				update_option('cmplz_wsc_scan_progress', 25); // set the progress to 25%
			}

			// once we have the scan id, we can check the status
			if (get_option('cmplz_wsc_scan_id') !== false) {
				$sleep = 6;
				sleep($sleep);
				update_option('cmplz_wsc_scan_progress', 25 + $iteration * 5);
				$status = $this->wsc_scan_get_status(); // check the status of the scan
			}

			if ($status === 'completed') { // if the status is completed
				// if is already completed by webhook and progress is 100
				$is_webhook_result = $this->wsc_scan_completed();

				if (!$is_webhook_result) { // if we don't have the results yet from the webhook
					update_option('cmplz_wsc_scan_progress', 100); // set to complete - 100%
					$cookies = $this->wsc_scan_get_cookies(); // get and store the cookies
				}
			}

			// if failed, stop scan and mark as completed for now
			if ($status === 'failed') {
				update_option('cmplz_wsc_scan_status', 'completed', false);
				update_option('cmplz_wsc_scan_progress', 100, false);
			}

			//check if we have results
			if (count($cookies) > 0) {
				// store the cookies
				$this->wsc_scan_store_cookies($cookies);
			}
		}


		/**
		 * Store cookies retrieved from the WSC scan.
		 *
		 * This function processes an array of cookies, filters out non-cookie and non-webStorage types,
		 * and stores the remaining cookies using the CMPLZ_COOKIE class.
		 *
		 * @param array $cookies An array of cookie objects retrieved from the WSC scan.
		 */
		public static function wsc_scan_store_cookies(array $cookies): void
		{
			foreach ($cookies as $key => $c) {
				// Skip if the type is not 'webStorage' or 'cookie'
				if ($c->type !== 'webStorage' && $c->type !== 'cookie') {
					continue;
				}

				$cookie = new CMPLZ_COOKIE();
				// Set the cookie type to 'localstorage' if it's 'webStorage', otherwise 'cookie'
				$cookie->type = $c->type === 'webStorage' ? 'localstorage' : 'cookie';
				// Set the domain to 'self' if it's 'webStorage', otherwise use the cookie's domain
				$cookie->domain = $c->type === 'cookie' ? $c->domain : 'self';
				// Add the cookie name and supported languages to the cookie object
				$cookie->add($c->name, COMPLIANZ::$banner_loader->get_supported_languages());
				// Save the cookie object
				$cookie->save(true);
			}
		}


		/**
		 * Reset the WSC scan options.
		 *
		 * This function deletes the options related to the WSC scan, effectively resetting the scan state.
		 * It removes the scan ID, scan status, scan progress, and scan iteration count from the WordPress options.
		 *
		 * @return void
		 */
		public static function wsc_scan_reset(): void
		{
			delete_option('cmplz_wsc_scan_id');
			delete_option('cmplz_wsc_scan_status');
			delete_option('cmplz_wsc_scan_progress');
			delete_option('cmplz_wsc_scan_iteration');
			delete_option('cmplz_wsc_scan_createdAt');
		}


		/**
		 * Check if the WSC scan is completed.
		 *
		 * This function checks if the WSC scan is enabled and then verifies if the scan progress
		 * has reached 100%, indicating that the scan is completed.
		 *
		 * @return bool True if the WSC scan is completed, false otherwise.
		 */
		public function wsc_scan_completed(): bool
		{
			if (!$this->wsc_scan_enabled()) { // force true
				cmplz_wsc_logger::log_errors('wsc_scan_completed', 'COMPLIANZ: wsc scan not enabled');
				return true;
			}
			return get_option('cmplz_wsc_scan_progress') === 100;
		}


		/**
		 * Get the progress of the WSC scan.
		 *
		 * This function checks if the WSC scan is enabled. If it is not enabled, it returns 100,
		 * indicating that the scan is complete. Otherwise, it retrieves the current scan progress
		 * from the WordPress options.
		 *
		 * @return int The current progress of the WSC scan, or 100 if the scan is not enabled.
		 */
		public function wsc_scan_progress(): int
		{
			if (!$this->wsc_scan_enabled()) {
				return 100;
			}
			return (int)get_option('cmplz_wsc_scan_progress');
		}


		/**
		 * Get the URL for the WSC scan.
		 *
		 * This function returns the URL to be used for the WSC scan. If the `SCRIPT_DEBUG` constant is defined,
		 * it returns the Complianz URL. Otherwise, it returns the site URL.
		 * @return string The URL to be used for the WSC scan.
		 */
		private function wsc_scan_get_url(): string
		{
			return site_url();
		}


		/**
		 * Retrieve the cookies found during the WSC scan.
		 *
		 * This function retrieves the cookies found during the WSC scan by making a request to the WSC API.
		 * It returns an array of cookie objects if the request is successful, or an empty array otherwise.
		 *
		 * @return array An array of cookie objects found during the WSC scan.
		 */
		private function wsc_scan_get_cookies(): array
		{
			$response = $this->wsc_scan_retrieve_scan();
			$cookies = [];

			if (is_wp_error($response)) {
				cmplz_wsc_logger::log_errors('wsc_scan_get_cookies', 'COMPLIANZ: error retrieving scan, error: ' . $response->get_error_message());
				return $cookies;
			}

			$data = json_decode(wp_remote_retrieve_body($response));
			if (isset($data->status) && $data->status === 'completed') {
				if (isset($data->result->trackers)) {
					$cookies = $data->result->trackers;
				}
				update_option('cmplz_wsc_scan_status', 'completed', false);
			}

			return $cookies;
		}


		/**
		 * Retrieve the status of the WSC scan.
		 *
		 * This function retrieves the status of the WSC scan by making a request to the WSC API.
		 * It returns the status if the request is successful, or the default status otherwise.
		 *
		 * @param int $iteration The current iteration of the status retrieval process.
		 * @return string The status of the WSC scan.
		 */
		private function wsc_scan_get_status(int $iteration = 0): string
		{
			$defaultStatus = get_option('cmplz_wsc_scan_status', 'not-started');
			$response = $this->wsc_scan_retrieve_scan(); // array or wp_error

			// Early exit if response is an error or if recurring and iteration is >= 2
			if (is_wp_error($response) || $iteration >= 2) {
				cmplz_wsc_logger::log_errors('wsc_scan_get_status', 'COMPLIANZ: error retrieving scan status');
				return $defaultStatus;
			}

			// Decode the response body
			$data = json_decode(wp_remote_retrieve_body($response));

			// Check if there was an error in the scan process
			if (isset($data->is_processed) && $data->is_processed === 'error' && isset($data->skipped_urls) && is_array($data->skipped_urls)) {
				foreach ($data->skipped_urls as $skipped) {
					if ($skipped->reason === 'PageNotLoadedError' && $skipped->url === $this->wsc_scan_get_url()) {
						cmplz_wsc_logger::log_errors('wsc_scan_get_status', 'COMPLIANZ: error in scan process');
						return 'failed';
					}
				}
			}

			// If an error occurred in the response, restart the scan and retry
			if (isset($data->error)) {
				$this->wsc_scan_start();
				return $this->wsc_scan_get_status($iteration + 1); // Retry with incremented iteration
			}

			// If a status is provided, update it and return
			if (isset($data->status)) {
				update_option('cmplz_wsc_scan_status', $data->status, false);
				return $data->status;
			}

			// Default return
			return $defaultStatus;
		}


		/**
		 * Retrieve the WSC scan.
		 *
		 * This function retrieves the WSC scan data by making a request to the WSC API.
		 * It returns the response if the request is successful, or an empty array otherwise.
		 *
		 * @return array|WP_Error The response from the WSC API.
		 */
		private function wsc_scan_retrieve_scan()
		{
			$id = sanitize_text_field(get_option('cmplz_wsc_scan_id'));

			$endpoint = self::WSC_SCANNER_ENDPOINT . '/api/v1/scans/' . $id;
			$token = cmplz_wsc_auth::get_token();

			return wp_remote_get($endpoint, array(
					'headers' => array(
						'Content-Type' => 'application/json',
						'Authorization' => 'oauth-' . $token
					),
					'timeout' => 15,
					'sslverify' => true
				)
			);
		}

		/**
		 * Initialize the WSC scan.
		 *
		 * This function resets the old scanner to start the WSC scan if the WSC scan status is already enabled.
		 *
		 * @return void
		 */
		public function wsc_scan_init(): void
		{
			if (get_option('cmplz_wsc_status') !== 'enabled') {
				return;
			}

			if (get_option('cmplz_wsc_scan_first_run', false)) {
				return;
			}

			$processed_pages_list = get_transient('cmplz_processed_pages_list');

			if (!is_array($processed_pages_list) || !in_array("remote", $processed_pages_list)) {
				return;
			}

			$processed_pages_list = array_diff($processed_pages_list, ["remote"]);
			set_transient('cmplz_processed_pages_list', $processed_pages_list, MONTH_IN_SECONDS);
			update_option('cmplz_wsc_scan_first_run', true, false);
		}
	}
}