<?php

namespace CirrusSearch\Search;

use CirrusSearch\Connection;
use CirrusSearch\SearchConfig;
use CirrusSearch\Util;
use Elastica\Index;
use Elastica\Query;
use MediaWiki\Logger\LoggerFactory;

/**
 * Build the search request body
 */
class SearchRequestBuilder {
	/** @var SearchContext */
	private $searchContext;

	/** @var Connection */
	private $connection;

	/** @var string */
	private $indexBaseName;

	/** @var int */
	private $offset = 0;

	/** @var int */
	private $limit = 20;

	/** @var string search timeout, string with time and unit, e.g. 20s for 20 seconds */
	private $timeout;

	/**
	 * @var Index|null force the index when set, use {@link Connection::pickIndexSuffixForNamespaces}
	 */
	private $index;

	/** @var string set the sort option, controls the use of rescore functions or elastic sort */
	private $sort = 'relevance';

	/**
	 * @param SearchContext $searchContext
	 * @param Connection $connection
	 * @param string $indexBaseName
	 */
	public function __construct( SearchContext $searchContext, Connection $connection, $indexBaseName ) {
		$this->searchContext = $searchContext;
		$this->connection = $connection;
		$this->indexBaseName = $indexBaseName;
	}

	/**
	 * Build the search request
	 * @return \Elastica\Search
	 */
	public function build() {
		$resultsType = $this->searchContext->getResultsType();

		$query = new Query();
		// Track at least offset + limit + 1 hits if precise total_hits is not requested
		// This useful to know if more results are available on the next page
		$query->setTrackTotalHits( $this->searchContext->getTrackTotalHits() ? true : $this->offset + $this->limit + 1 );
		$query->setSource( $resultsType->getSourceFiltering() );
		$query->setParam( "fields", $resultsType->getFields() );

		$extraIndexes = $this->searchContext->getExtraIndices();

		if ( $extraIndexes && $this->searchContext->getConfig()->getElement( 'CirrusSearchDeduplicateInQuery' ) !== false ) {
			$this->searchContext->addNotFilter( new \Elastica\Query\Term(
				[ 'local_sites_with_dupe' => $this->indexBaseName ]
			) );
		}

		$mainQuery = $this->searchContext->getQuery();
		$query->setQuery( $mainQuery );

		foreach ( $this->searchContext->getAggregations() as $agg ) {
			$query->addAggregation( $agg );
		}

		$highlight = $this->searchContext->getHighlight( $resultsType, $mainQuery );
		if ( $highlight ) {
			$query->setHighlight( $highlight );
		}

		$suggestQueries = $this->searchContext->getFallbackRunner()->getElasticSuggesters();
		if ( $suggestQueries ) {
			$query->setParam( 'suggest', [
				// TODO: remove special case on 1-elt array, added to not change the test fixtures
				// We should switch to explicit naming
				'suggest' => count( $suggestQueries ) === 1 ? reset( $suggestQueries ) : $suggestQueries
			] );
			$query->addParam( 'stats', 'suggest' );
		}

		foreach ( $this->searchContext->getSyntaxUsed() as $syntax ) {
			$query->addParam( 'stats', $syntax );
		}

		// See also CirrusSearch::getValidSorts()
		switch ( $this->sort ) {
			case 'just_match':
				// Use just matching scores, without any rescoring, and default sort.
				break;
			case 'relevance':
				// Add some rescores to improve relevance
				$rescores = $this->searchContext->getRescore();
				if ( $rescores !== [] ) {
					$query->setParam( 'rescore', $rescores );
				}
				break;  // The default
			case 'create_timestamp_asc':
				$query->setSort( [ 'create_timestamp' => 'asc' ] );
				break;
			case 'create_timestamp_desc':
				$query->setSort( [ 'create_timestamp' => 'desc' ] );
				break;
			case 'last_edit_asc':
				$query->setSort( [ 'timestamp' => 'asc' ] );
				break;
			case 'last_edit_desc':
				$query->setSort( [ 'timestamp' => 'desc' ] );
				break;
			case 'incoming_links_asc':
				$query->setSort( [ 'incoming_links' => [
					'order' => 'asc',
					'missing' => '_first',
				] ] );
				break;
			case 'incoming_links_desc':
				$query->setSort( [ 'incoming_links' => [
					'order' => 'desc',
					'missing' => '_last',
				] ] );
				break;
			case 'none':
				// Return documents in index order
				$query->setSort( [ '_doc' ] );
				break;
			case 'random':
				$randomSeed = $this->searchContext->getSearchQuery()->getRandomSeed();
				if ( $randomSeed === null && $this->offset !== 0 ) {
					$this->searchContext->addWarning( 'cirrussearch-offset-not-allowed-with-random-sort' );
					$this->offset = 0;
				}
				// Can't use an empty array, it would JSONify to [] instead of {}.
				$scoreParams = ( $randomSeed === null ) ? (object)[] : [ 'seed' => $randomSeed, 'field' => '_seq_no' ];
				// Instead of setting a sort field wrap the whole query in a
				// bool filter and add a must clause for the random score. This
				// could alternatively be a rescore over a limited document
				// set, but in basic testing the filter was more performant
				// than an 8k rescore window even with 50M total hits.
				$query->setQuery( ( new Query\BoolQuery() )
					->addFilter( $mainQuery )
					->addMust( ( new Query\FunctionScore() )
						->setQuery( new Query\MatchAll() )
						->addFunction( 'random_score', $scoreParams ) ) );

				break;
			case 'user_random':
				// Randomly ordered, but consistent for a single user
				$query->setQuery( ( new Query\BoolQuery() )
					->addFilter( $mainQuery )
					->addMust( ( new Query\FunctionScore() )
						->setQuery( new Query\MatchAll() )
						->addFunction( 'random_score', [
							'seed' => Util::generateIdentToken(),
							'field' => '_seq_no',
						] ) ) );
				break;

			case 'title_natural_asc':
			case 'title_natural_desc':
				if ( $this->searchContext->getConfig()->getElement( 'CirrusSearchNaturalTitleSort', 'use' ) ) {
					$query->setSort( [
						'title.natural_sort' => explode( '_', $this->sort, 3 )[2],
					] );
					break;
				}
				// Intentional fall-through to default error case.

			default:
				// Same as just_match. No user warning since an invalid sort
				// getting this far is a bug in the calling code which should
				// be validating it's input.
				LoggerFactory::getInstance( 'CirrusSearch' )->warning(
					"Invalid sort type: {sort}",
					[ 'sort' => $this->sort ]
				);
		}

		if ( $this->offset ) {
			$query->setFrom( $this->offset );
		}
		if ( $this->limit ) {
			$query->setSize( $this->limit );
		}

		// Setup the search
		$queryOptions = [];
		if ( $this->timeout ) {
			$queryOptions[\Elastica\Search::OPTION_TIMEOUT] = $this->timeout;
		}
		// @todo when switching to multi-search this has to be provided at the top level
		if ( $this->searchContext->getConfig()->get( 'CirrusSearchMoreAccurateScoringMode' ) ) {
			$queryOptions[\Elastica\Search::OPTION_SEARCH_TYPE] = \Elastica\Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH;
		}

		$search = $this->getIndex()->createSearch( $query, $queryOptions );
		$crossClusterName = $this->connection->getConfig()->getClusterAssignment()->getCrossClusterName();
		foreach ( $extraIndexes as $i ) {
			$search->addIndex( $this->connection->getIndex( $i->getSearchIndex( $crossClusterName ) ) );
		}

		$this->searchContext->getDebugOptions()->applyDebugOptions( $query );
		return $search;
	}

	/**
	 * @return int
	 */
	public function getOffset() {
		return $this->offset;
	}

	/**
	 * @param int $offset
	 * @return self
	 */
	public function setOffset( $offset ) {
		$this->offset = $offset;

		return $this;
	}

	/**
	 * @return int
	 */
	public function getLimit() {
		return $this->limit;
	}

	/**
	 * @param int $limit
	 * @return self
	 */
	public function setLimit( $limit ) {
		$this->limit = $limit;

		return $this;
	}

	/**
	 * @return string
	 */
	public function getTimeout() {
		return $this->timeout;
	}

	/**
	 * @param string $timeout
	 * @return self
	 */
	public function setTimeout( $timeout ) {
		$this->timeout = $timeout;

		return $this;
	}

	/**
	 * @return \Elastica\Index An elastica type suitable for searching against
	 *  the configured wiki over the host wiki's default connection.
	 */
	public function getIndex(): \Elastica\Index {
		if ( $this->index ) {
			return $this->index;
		} else {
			$indexBaseName = $this->indexBaseName;
			$config = $this->searchContext->getConfig();
			$hostConfig = $config->getHostWikiConfig();

			$indexName = $this->inferIndexFromConcreteNamespaceMap( $config );
			if ( $indexName === null ) {
				$indexSuffix = $this->connection->pickIndexSuffixForNamespaces(
					$this->searchContext->getNamespaces() );
				$indexName = $this->connection->getIndexName( $indexBaseName, $indexSuffix );
			}

			if ( $hostConfig->get( 'CirrusSearchCrossClusterSearch' ) ) {
				$local = $hostConfig->getClusterAssignment()->getCrossClusterName();
				$current = $config->getClusterAssignment()->getCrossClusterName();
				if ( $local !== $current ) {
					$indexName = $current . ':' . $indexName;
				}
			}
			return $this->connection->getIndex( $indexName );
		}
	}

	/**
	 * Attempt to infer the index from the concrete namespace map.
	 * This is used mainly during crossproject searches where the concrete namespace map
	 * is provided by the config dump API.
	 * Since we might want to query namespaces that are unknown to the host wiki, we
	 * can't use the connection to pick the index suffix.
	 * Instead, we use the concrete namespace map to infer the index suffix.
	 * Returns null if the concrete namespace map is not available or if multiple index types
	 * might be required, in which case we rely on the host wiki connection to pick up the
	 * right index.
	 *
	 * @param SearchConfig $config
	 * @return string|null
	 */
	private function inferIndexFromConcreteNamespaceMap( SearchConfig $config ): ?string {
		if ( $this->searchContext->getNamespaces() && $config->has( 'CirrusSearchConcreteNamespaceMap' ) ) {
			// Attempt to skip Connection::pickIndexSuffixForNamespaces() and use the
			// concrete namespace map.
			// Reason is that the connection is built against the host wiki config but
			// the concrete namespace map is likely obtained for the target wiki.
			$concreteNamespaceMap = $config->get( 'CirrusSearchConcreteNamespaceMap' );
			$indices = [];
			$inconsistentNamespaces = false;
			foreach ( $this->searchContext->getNamespaces() as $ns ) {
				if ( !isset( $concreteNamespaceMap[$ns] ) ) {
					// Something's odd here we can trust the target wiki config
					$inconsistentNamespaces = true;
					continue;
				}
				$indices[$concreteNamespaceMap[$ns]] = $concreteNamespaceMap[$ns];
			}
			// the concrete namespace map contains the full index name $basename_$suffix
			// if there's only one index requested we target this one
			// otherwise we target the main alias.
			if ( !$inconsistentNamespaces && count( $indices ) === 1 ) {
				$indexBaseName = reset( $indices );
				return $this->connection->getIndexName( $indexBaseName );
			}
		}
		return null;
	}

	/**
	 * @param ?Index $index
	 * @return $this
	 */
	public function setIndex( ?Index $index ): self {
		$this->index = $index;
		return $this;
	}

	/**
	 * @return string
	 */
	public function getSort() {
		return $this->sort;
	}

	/**
	 * @param string $sort
	 * @return self
	 */
	public function setSort( $sort ) {
		$this->sort = $sort;

		return $this;
	}

	/**
	 * @return SearchContext
	 */
	public function getSearchContext() {
		return $this->searchContext;
	}
}
