/*
 * Decompiled with CFR 0.152.
 */
package org.opensearch.ml.common.indexInsight;

import java.time.Instant;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import lombok.Generated;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.opensearch.action.admin.indices.mapping.get.GetMappingsRequest;
import org.opensearch.action.search.SearchRequest;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.cluster.metadata.MappingMetadata;
import org.opensearch.core.action.ActionListener;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryBuilders;
import org.opensearch.ml.common.indexInsight.AbstractIndexInsightTask;
import org.opensearch.ml.common.indexInsight.IndexInsight;
import org.opensearch.ml.common.indexInsight.IndexInsightTask;
import org.opensearch.ml.common.indexInsight.IndexInsightTaskStatus;
import org.opensearch.ml.common.indexInsight.MLIndexInsightType;
import org.opensearch.ml.common.utils.StringUtils;
import org.opensearch.ml.common.utils.mergeMetaDataUtils.MergeRuleHelper;
import org.opensearch.ml.repackage.com.google.common.annotations.VisibleForTesting;
import org.opensearch.remote.metadata.client.SdkClient;
import org.opensearch.search.SearchHit;
import org.opensearch.search.aggregations.Aggregation;
import org.opensearch.search.aggregations.AggregationBuilder;
import org.opensearch.search.aggregations.AggregationBuilders;
import org.opensearch.search.aggregations.AggregatorFactories;
import org.opensearch.search.aggregations.bucket.filter.FiltersAggregationBuilder;
import org.opensearch.search.aggregations.bucket.filter.FiltersAggregator;
import org.opensearch.search.aggregations.bucket.filter.InternalFilters;
import org.opensearch.search.aggregations.bucket.sampler.InternalSampler;
import org.opensearch.search.aggregations.bucket.sampler.SamplerAggregationBuilder;
import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.opensearch.search.aggregations.metrics.CardinalityAggregationBuilder;
import org.opensearch.search.aggregations.metrics.InternalTopHits;
import org.opensearch.search.aggregations.metrics.MaxAggregationBuilder;
import org.opensearch.search.aggregations.metrics.MinAggregationBuilder;
import org.opensearch.search.aggregations.metrics.TopHitsAggregationBuilder;
import org.opensearch.search.builder.SearchSourceBuilder;
import org.opensearch.search.sort.SortOrder;
import org.opensearch.transport.client.Client;

public class StatisticalDataTask
extends AbstractIndexInsightTask {
    @Generated
    private static final Logger log = LogManager.getLogger(StatisticalDataTask.class);
    private static final int TERM_SIZE = 5;
    private static final List<String> PREFIXES = List.of("unique_terms_", "unique_count_", "max_value_", "min_value_");
    private static final List<String> UNIQUE_TERMS_LIST = List.of("text", "keyword", "integer", "long", "short");
    private static final List<String> MIN_MAX_LIST = List.of("integer", "long", "float", "double", "short", "date");
    private static final Double HIGH_PRIORITY_COLUMN_THRESHOLD = 0.001;
    private static final int SAMPLE_NUMBER = 100000;
    private static final String PARSE_COLUMN_NAME_PATTERN = "<column_name>(.*?)</column_name>";
    private static final int FILTER_LLM_NUMBERS = 30;
    public static final String NOT_NULL_KEYWORD = "not_null";
    public static final String IMPORTANT_COLUMN_KEYWORD = "important_column_and_distribution";
    public static final String EXAMPLE_DOC_KEYWORD = "example_docs";
    private static final String UNIQUE_TERM_PREFIX = "unique_terms_";
    private static final String MAX_VALUE_PREFIX = "max_value_";
    private static final String MIN_VALUE_PREFIX = "min_value_";
    private static final String UNIQUE_COUNT_PREFIX = "unique_count_";
    private static final String PROMPT_TEMPLATE = "Now I will give you the sample examples and some field's data distribution of one Opensearch index.\nYou should help me filter at most %s important columns.\nFor logs/trace/metric related indices, make sure you contain error/http response/time/latency/metric related columns.\nYou should contain your response column name inside tag <column_name></column_name>\nHere is the information of sample examples and some field's data distribution.\n\nIndexName: %s\ndetailed information: %s\n";

    public StatisticalDataTask(String sourceIndex, Client client, SdkClient sdkClient) {
        super(MLIndexInsightType.STATISTICAL_DATA, sourceIndex, client, sdkClient);
    }

    @Override
    public void runTask(String tenantId, ActionListener<IndexInsight> listener) {
        this.runTask(tenantId, listener, true);
    }

    public void runTask(String tenantId, ActionListener<IndexInsight> listener, boolean shouldStore) {
        try {
            this.collectStatisticalData(tenantId, shouldStore, listener);
        }
        catch (Exception e) {
            this.handleError("Failed to execute statistical data task for index {}", e, tenantId, listener, shouldStore);
        }
    }

    @Override
    protected void handlePatternMatchedDoc(Map<String, Object> patternSource, String tenantId, ActionListener<IndexInsight> listener) {
        this.runTask(tenantId, listener, false);
    }

    @Override
    public List<MLIndexInsightType> getPrerequisites() {
        return Collections.emptyList();
    }

    private void collectStatisticalData(String tenantId, boolean shouldStore, ActionListener<IndexInsight> listener) {
        GetMappingsRequest getMappingsRequest = (GetMappingsRequest)new GetMappingsRequest().indices(new String[]{this.sourceIndex});
        this.client.admin().indices().getMappings(getMappingsRequest, ActionListener.wrap(getMappingsResponse -> {
            Map mappings = getMappingsResponse.getMappings();
            if (mappings.isEmpty()) {
                listener.onFailure((Exception)new IllegalArgumentException("No matching mapping with index name: " + this.sourceIndex));
                return;
            }
            HashMap<String, Object> allFields = new HashMap<String, Object>();
            for (MappingMetadata mappingMetadata : mappings.values()) {
                Map mappingSource = (Map)mappingMetadata.getSourceAsMap().get("properties");
                MergeRuleHelper.merge(mappingSource, allFields);
            }
            HashMap<String, String> fieldsToType = new HashMap<String, String>();
            StatisticalDataTask.extractFieldNamesTypes(allFields, fieldsToType, "", false);
            SearchRequest searchRequest = new SearchRequest(new String[]{this.sourceIndex});
            searchRequest.source(this.buildQuery(fieldsToType));
            this.client.search(searchRequest, ActionListener.wrap(searchResponse -> {
                Set<String> highPriorityColumns = this.filterColumns((Map<String, String>)fieldsToType, (SearchResponse)searchResponse);
                Map<String, Object> parsedResult = this.parseSearchResult((Map<String, String>)fieldsToType, highPriorityColumns, (SearchResponse)searchResponse);
                this.filterImportantColumnByLLM(parsedResult, tenantId, (ActionListener<List<String>>)ActionListener.wrap(response -> {
                    HashMap<String, Object> filteredResponse = new HashMap<String, Object>();
                    filteredResponse.put(EXAMPLE_DOC_KEYWORD, this.filterSampleColumns((List)parsedResult.get(EXAMPLE_DOC_KEYWORD), (List<String>)response));
                    Map importantColumns = (Map)parsedResult.get(IMPORTANT_COLUMN_KEYWORD);
                    Map<String, Object> filteredImportantColumns = importantColumns.entrySet().stream().filter(entry -> response.isEmpty() || response.contains(entry.getKey())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
                    filteredResponse.put(IMPORTANT_COLUMN_KEYWORD, filteredImportantColumns);
                    String statisticalContent = StringUtils.gson.toJson(filteredResponse);
                    if (shouldStore) {
                        this.saveResult(statisticalContent, tenantId, listener);
                    } else {
                        IndexInsight insight = IndexInsight.builder().index(this.sourceIndex).taskType(this.taskType).content(statisticalContent).status(IndexInsightTaskStatus.COMPLETED).lastUpdatedTime(Instant.now()).build();
                        listener.onResponse((Object)insight);
                    }
                }, arg_0 -> ((ActionListener)listener).onFailure(arg_0)));
            }, e -> this.handleError("Failed to collect statistical data for index: {}", (Exception)e, tenantId, listener, shouldStore)));
        }, arg_0 -> listener.onFailure(arg_0)));
    }

    @Override
    public IndexInsightTask createPrerequisiteTask(MLIndexInsightType prerequisiteType) {
        throw new IllegalStateException("StatisticalDataTask has no prerequisites");
    }

    public SearchSourceBuilder buildQuery(Map<String, String> fields2) {
        AggregatorFactories.Builder subAggs = new AggregatorFactories.Builder();
        for (Map.Entry<String, String> field : fields2.entrySet()) {
            String name = field.getKey();
            String type2 = field.getValue();
            Object fieldUsed = name;
            if ("text".equals(type2)) {
                fieldUsed = (String)name + ".keyword";
            }
            if (UNIQUE_TERMS_LIST.contains(type2)) {
                TermsAggregationBuilder termsAgg = ((TermsAggregationBuilder)AggregationBuilders.terms((String)(UNIQUE_TERM_PREFIX + (String)name)).field((String)fieldUsed)).size(5);
                CardinalityAggregationBuilder countAgg = (CardinalityAggregationBuilder)AggregationBuilders.cardinality((String)(UNIQUE_COUNT_PREFIX + (String)name)).field((String)fieldUsed);
                subAggs.addAggregator((AggregationBuilder)termsAgg);
                subAggs.addAggregator((AggregationBuilder)countAgg);
            }
            if (!MIN_MAX_LIST.contains(type2)) continue;
            MinAggregationBuilder minAgg = (MinAggregationBuilder)AggregationBuilders.min((String)(MIN_VALUE_PREFIX + (String)name)).field((String)fieldUsed);
            MaxAggregationBuilder maxAgg = (MaxAggregationBuilder)AggregationBuilders.max((String)(MAX_VALUE_PREFIX + (String)name)).field((String)fieldUsed);
            subAggs.addAggregator((AggregationBuilder)minAgg);
            subAggs.addAggregator((AggregationBuilder)maxAgg);
        }
        TopHitsAggregationBuilder topHitsAgg = AggregationBuilders.topHits((String)EXAMPLE_DOC_KEYWORD).size(3);
        subAggs.addAggregator((AggregationBuilder)topHitsAgg);
        ArrayList<FiltersAggregator.KeyedFilter> keyedFilters = new ArrayList<FiltersAggregator.KeyedFilter>();
        for (String fieldName : fields2.keySet()) {
            keyedFilters.add(new FiltersAggregator.KeyedFilter(fieldName + "_not_null", (QueryBuilder)QueryBuilders.existsQuery((String)fieldName)));
        }
        FiltersAggregationBuilder nonNullAgg = AggregationBuilders.filters((String)NOT_NULL_KEYWORD, (FiltersAggregator.KeyedFilter[])keyedFilters.toArray(new FiltersAggregator.KeyedFilter[0]));
        subAggs.addAggregator((AggregationBuilder)nonNullAgg);
        SamplerAggregationBuilder samplerAgg = (SamplerAggregationBuilder)AggregationBuilders.sampler((String)"sample").shardSize(100000).subAggregations(subAggs);
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder().query((QueryBuilder)QueryBuilders.matchAllQuery()).sort("_doc", SortOrder.DESC).size(0).aggregation((AggregationBuilder)samplerAgg);
        return sourceBuilder;
    }

    private void filterImportantColumnByLLM(Map<String, Object> parsedResult, String tenantId, ActionListener<List<String>> listener) {
        Map importantColumns = (Map)parsedResult.get(IMPORTANT_COLUMN_KEYWORD);
        if (importantColumns.size() <= 30) {
            listener.onResponse(new ArrayList());
            return;
        }
        String prompt = this.generateFilterColumnPrompt(parsedResult);
        StatisticalDataTask.getAgentIdToRun(this.client, tenantId, (ActionListener<String>)ActionListener.wrap(agentId -> StatisticalDataTask.callLLMWithAgent(this.client, agentId, prompt, tenantId, (ActionListener<String>)ActionListener.wrap(response -> listener.onResponse(this.parseLLMFilteredResult((String)response)), e -> listener.onResponse(new ArrayList()))), e -> listener.onResponse(new ArrayList())));
    }

    private String generateFilterColumnPrompt(Map<String, Object> parsedResult) {
        return String.format(PROMPT_TEMPLATE, 30, this.sourceIndex, StringUtils.gson.toJson(parsedResult));
    }

    @VisibleForTesting
    List<Map<String, Object>> filterSampleColumns(List<Map<String, Object>> originalDocs, List<String> targetColumns) {
        if (targetColumns.isEmpty()) {
            return originalDocs;
        }
        ArrayList<Map<String, Object>> results = new ArrayList<Map<String, Object>>();
        for (Map<String, Object> originalDoc : originalDocs) {
            results.add(this.constructFilterMap("", originalDoc, targetColumns));
        }
        return results;
    }

    private Map<String, Object> constructFilterMap(String prefix, Map<String, Object> currentNode, List<String> targetColumns) {
        HashMap<String, Object> filterResult = new HashMap<String, Object>();
        for (Map.Entry<String, Object> entry : currentNode.entrySet()) {
            List list;
            String currentKey = prefix.isEmpty() ? entry.getKey() : prefix + "." + entry.getKey();
            Object currentValue = entry.getValue();
            if (targetColumns.contains(currentKey)) {
                filterResult.put(entry.getKey(), currentValue);
                continue;
            }
            if (currentValue instanceof Map) {
                Map<String, Object> tmpNode = this.constructFilterMap(currentKey, (Map)currentValue, targetColumns);
                if (tmpNode.isEmpty()) continue;
                filterResult.put(entry.getKey(), tmpNode);
                continue;
            }
            if (!(currentValue instanceof List) || (list = (List)currentValue).isEmpty() || !(list.get(0) instanceof Map)) continue;
            ArrayList<Map<String, Object>> newList = new ArrayList<Map<String, Object>>();
            for (Object item : list) {
                Map<String, Object> tmpNode = this.constructFilterMap(currentKey, (Map)item, targetColumns);
                if (tmpNode.isEmpty()) continue;
                newList.add(tmpNode);
            }
            if (newList.isEmpty()) continue;
            filterResult.put(entry.getKey(), newList);
        }
        return filterResult;
    }

    private List<String> parseLLMFilteredResult(String LLMResponse) {
        try {
            Pattern pattern = Pattern.compile(PARSE_COLUMN_NAME_PATTERN);
            Matcher matcher = pattern.matcher(LLMResponse);
            ArrayList<String> columns = new ArrayList<String>();
            while (matcher.find()) {
                columns.add(matcher.group(1).trim());
            }
            return columns;
        }
        catch (Exception e) {
            throw new IllegalArgumentException("fail to parse LLM response");
        }
    }

    private Map<String, Object> parseSearchResult(Map<String, String> allFieldsToType, Set<String> filteredNames, SearchResponse searchResponse) {
        Map aggregationMap = ((InternalSampler)searchResponse.getAggregations().getAsMap().get("sample")).getAggregations().getAsMap();
        LinkedHashMap<String, Object> result2 = new LinkedHashMap<String, Object>();
        LinkedHashMap<String, Object> finalResult = new LinkedHashMap<String, Object>();
        ArrayList<Map> exampleDocs = null;
        block2: for (Map.Entry entry : aggregationMap.entrySet()) {
            String key = (String)entry.getKey();
            Aggregation aggregation = (Aggregation)entry.getValue();
            if (key.equals(EXAMPLE_DOC_KEYWORD)) {
                SearchHit[] hits = ((InternalTopHits)aggregation).getHits().getHits();
                exampleDocs = new ArrayList<Map>(hits.length);
                for (SearchHit hit : hits) {
                    exampleDocs.add(hit.getSourceAsMap());
                }
                continue;
            }
            for (String prefix : PREFIXES) {
                String targetField;
                if (!key.startsWith(prefix) || !filteredNames.contains(targetField = key.substring(prefix.length()))) continue;
                String aggregationType = key.substring(0, prefix.length() - 1);
                Map aggregationResult = StringUtils.gson.fromJson(aggregation.toString(), Map.class);
                try {
                    Object targetValue;
                    if (prefix.equals(UNIQUE_TERM_PREFIX)) {
                        List buckets;
                        aggResult = (Map)aggregationResult.get(key);
                        List list = buckets = aggResult != null ? (List)aggResult.get("buckets") : null;
                        if (buckets == null) continue;
                        targetValue = buckets.stream().filter(bucket -> bucket != null).map(bucket -> bucket.get("key")).toList();
                    } else {
                        aggResult = (Map)aggregationResult.get(key);
                        targetValue = aggResult.containsKey("value_as_string") ? aggResult.get("value_as_string") : aggResult.get("value");
                    }
                    result2.computeIfAbsent(targetField, k -> new HashMap<String, String>(Map.of("type", (String)allFieldsToType.get(targetField))));
                    ((Map)result2.get(targetField)).put(aggregationType, targetValue);
                    continue block2;
                }
                catch (Exception e) {
                    log.error("Failed to parse aggregation result from DSL in statistical index insight for index name: {}", (Object)this.sourceIndex, (Object)e);
                }
            }
        }
        if (exampleDocs != null) {
            finalResult.put(EXAMPLE_DOC_KEYWORD, exampleDocs);
        }
        finalResult.put(IMPORTANT_COLUMN_KEYWORD, result2);
        return finalResult;
    }

    private Set<String> filterColumns(Map<String, String> allFieldsToType, SearchResponse searchResponse) {
        InternalSampler sampleAggregation = (InternalSampler)searchResponse.getAggregations().getAsMap().get("sample");
        Map aggregationMap = sampleAggregation.getAggregations().getAsMap();
        long totalDocCount = sampleAggregation.getDocCount();
        HashSet<String> filteredNames = new HashSet<String>();
        InternalFilters aggregation = (InternalFilters)aggregationMap.get(NOT_NULL_KEYWORD);
        for (InternalFilters.InternalBucket bucket : aggregation.getBuckets()) {
            String targetField = bucket.getKey();
            targetField = targetField.substring(0, targetField.length() - 1 - NOT_NULL_KEYWORD.length());
            long docCount = bucket.getDocCount();
            if (!((double)docCount > HIGH_PRIORITY_COLUMN_THRESHOLD * (double)totalDocCount) || !allFieldsToType.containsKey(targetField)) continue;
            filteredNames.add(targetField);
        }
        return filteredNames;
    }
}

