Open
Description
Search before asking
- I had searched in the issues and found no similar issues.
Version
Git : git://vm-241@39f9074
Version : doris-3.0.4-rc02
BuildInfo : vm-241
BuildTime : Fri, 21 Feb 2025 16:28:34 1Z
What's Wrong?
-
执行下面的超长查询语句包含2个条件执行会卡死,必须kill才行,否则影响整个集群的其他查询。
查询1的条件: WHERE (array_contains(cert_subject_root_domains, 'stackoverflow.com')) OR (array_contains(cert_subject_root_domains, 'cypress.com')) -
只有一个查询条件的话1s就能返回,下面两个查询都能1s返回:
查询2的条件: WHERE (array_contains(cert_subject_root_domains, 'stackoverflow.com'))
查询3的条件: WHERE (array_contains(cert_subject_root_domains, 'cypress.com')) -
去掉查询1后面的cte查询部分, 只有base_data的查询2s就能返回,count或获取limit 10。
-
使用explain分析, 查询1和查询2只有下面的PREDICATES部分有区别
0:VOlapScanNode(4272) |
TABLE: fofa_data.fofa_data(fofa_data), PREAGGREGATION: ON |
PREDICATES: ((array_contains(cert_subject_root_domains[#52], 'cypress.com') AND (lastupdatetime[#2] > '2024-04-22 11:11:43')) AND (((is_fraud[#98] = FALSE) OR is_fraud[#98] IS NULL) AND (__DORIS_DELETE_SIGN__[#105] = 0))) |
partitions=1/1 (fofa_data) |
tablets=1000/1000, tabletList=1742801763726,1742801763729,1742801763732 ... |
cardinality=5832035224, avgRowSize=1287.6332, numNodes=2 |
pushAggOp=NONE |
final projections: asn_no[#66], asn_org[#67], cert_is_expired[#45], cert_is_match[#44], cert_is_valid[#43], cert_iss_sub_eq[#46], cert_issuer_cn_one[#53], cert_issuer_orgs[#55], cert_not_after[#57], cert_not_before[#58], cert_sn[#47], cert_subject_cn|
final project output tuple id: 1
- 执行profile对比两个查询, 查询1的OLAP_SCAN_OPERATOR占用时间特别长,profile结果文件:
WITH base_data AS (
SELECT
asn_no,
asn_org,
cert_is_expired,
cert_is_match,
cert_is_valid,
cert_iss_sub_eq,
cert_issuer_cn_one,
cert_issuer_orgs,
cert_not_after,
cert_not_before,
cert_sn,
cert_subject_cn,
cert_subject_org_one,
cert_subject_root_domains,
geoip_country_name,
ip,
lastupdatetime,
port,
protocol
FROM fofa_data
WHERE (array_contains(cert_subject_root_domains, 'stackoverflow.com')) OR (array_contains(cert_subject_root_domains, 'cypress.com')) AND lastupdatetime > DATE_SUB(NOW(), INTERVAL 1 YEAR) AND (is_fraud = false OR is_fraud IS NULL)
),
total_count_cte AS (
SELECT
COUNT(*) AS total_count
FROM base_data
),
unique_ip_count_cte AS (
SELECT
COUNT(DISTINCT ip) AS unique_ip_count
FROM base_data
WHERE ip IS NOT NULL
),
top_geoip_country_name AS (
SELECT
geoip_country_name,
COUNT(*) AS total_count
FROM base_data t1
WHERE geoip_country_name IS NOT NULL
GROUP BY geoip_country_name
ORDER BY total_count DESC
LIMIT 10
),
top_port AS (
SELECT
port,
COUNT(*) AS total_count
FROM base_data t1
WHERE port IS NOT NULL
GROUP BY port
ORDER BY total_count DESC
LIMIT 10
),
top_protocol AS (
SELECT
protocol,
COUNT(*) AS total_count
FROM base_data t1
WHERE protocol IS NOT NULL
GROUP BY protocol
ORDER BY total_count DESC
LIMIT 10
),
top_cert_subject_org_one AS (
SELECT
cert_subject_org_one,
COUNT(*) AS total_count
FROM base_data t1
WHERE cert_subject_org_one IS NOT NULL
GROUP BY cert_subject_org_one
ORDER BY total_count DESC
LIMIT 10
),
top_cert_sn AS (
SELECT
cert_sn,
COUNT(*) AS total_count,
any(cert_is_valid) as cert_is_valid,
any(cert_is_match) as cert_is_match,
any(cert_iss_sub_eq) as cert_iss_sub_eq,
any(cert_is_expired) as cert_is_expired,
any(CAST(CAST(cert_subject_root_domains AS JSON) AS STRING)) as cert_subject_root_domains,
any(cert_subject_org_one) as cert_subject_org_one,
any(cert_subject_cn) as cert_subject_cn,
any(CAST(CAST(cert_issuer_orgs AS JSON) AS STRING)) as cert_issuer_orgs,
any(cert_issuer_cn_one) as cert_issuer_cn_one,
any(cert_not_before) as cert_not_before,
any(cert_not_after) as cert_not_after
FROM base_data t1
WHERE cert_sn IS NOT NULL
GROUP BY cert_sn
ORDER BY total_count DESC
LIMIT 10
),
top_asn_no AS (
SELECT
asn_no,
COUNT(*) AS total_count,
any(asn_org) as asn_org
FROM base_data t1
WHERE asn_no IS NOT NULL
GROUP BY asn_no
ORDER BY total_count DESC
LIMIT 10
),
unique_geoip_country_name AS (
SELECT
COUNT(DISTINCT geoip_country_name) AS unique_count
FROM base_data
WHERE geoip_country_name IS NOT NULL
),
unique_port AS (
SELECT
COUNT(DISTINCT port) AS unique_count
FROM base_data
WHERE port IS NOT NULL
),
unique_protocol AS (
SELECT
COUNT(DISTINCT protocol) AS unique_count
FROM base_data
WHERE protocol IS NOT NULL
),
unique_cert_subject_org_one AS (
SELECT
COUNT(DISTINCT cert_subject_org_one) AS unique_count
FROM base_data
WHERE cert_subject_org_one IS NOT NULL
),
unique_cert_sn AS (
SELECT
COUNT(DISTINCT cert_sn) AS unique_count
FROM base_data
WHERE cert_sn IS NOT NULL
),
unique_asn_no AS (
SELECT
COUNT(DISTINCT asn_no) AS unique_count
FROM base_data
WHERE asn_no IS NOT NULL
)
SELECT
'country' AS field_name,
geoip_country_name AS field_value,
total_count AS count,
NULL AS detail,
NULL AS uniq_json
FROM top_geoip_country_name
UNION ALL
SELECT
'uniq' AS field_name,
'country' AS field_value,
unique_count AS count,
NULL AS detail,
NULL AS uniq_json
FROM unique_geoip_country_name
UNION ALL
SELECT
'port' AS field_name,
port AS field_value,
total_count AS count,
NULL AS detail,
NULL AS uniq_json
FROM top_port
UNION ALL
SELECT
'uniq' AS field_name,
'port' AS field_value,
unique_count AS count,
NULL AS detail,
NULL AS uniq_json
FROM unique_port
UNION ALL
SELECT
'protocol' AS field_name,
protocol AS field_value,
total_count AS count,
NULL AS detail,
NULL AS uniq_json
FROM top_protocol
UNION ALL
SELECT
'uniq' AS field_name,
'protocol' AS field_value,
unique_count AS count,
NULL AS detail,
NULL AS uniq_json
FROM unique_protocol
UNION ALL
SELECT
'cert.subject.org' AS field_name,
cert_subject_org_one AS field_value,
total_count AS count,
NULL AS detail,
NULL AS uniq_json
FROM top_cert_subject_org_one
UNION ALL
SELECT
'uniq' AS field_name,
'cert.subject.org' AS field_value,
unique_count AS count,
NULL AS detail,
NULL AS uniq_json
FROM unique_cert_subject_org_one
UNION ALL
SELECT
'cert.sn' AS field_name,
cert_sn AS field_value,
total_count AS count,
JSON_OBJECT(
'cert.sn', cert_sn,
'cert.is_expired', cert_is_expired,
'cert.is_match', cert_is_match,
'cert.is_valid', cert_is_valid,
'cert.is_equal', cert_iss_sub_eq,
'cert.issuer.cn', cert_issuer_cn_one,
'cert.issuer.org', CAST(cert_issuer_orgs AS JSONB),
'cert.not_after', cert_not_after,
'cert.not_before', cert_not_before,
'cert.subject.cn', cert_subject_cn,
'cert.subject.org', cert_subject_org_one,
'cert.domain', CAST(cert_subject_root_domains AS JSONB)
) AS detail,
NULL AS uniq_json
FROM top_cert_sn
UNION ALL
SELECT
'uniq' AS field_name,
'cert.sn' AS field_value,
unique_count AS count,
NULL AS detail,
NULL AS uniq_json
FROM unique_cert_sn
UNION ALL
SELECT
'asn' AS field_name,
asn_no AS field_value,
total_count AS count,
JSON_OBJECT(
'asn', asn_no,
'org', asn_org
) AS detail,
NULL AS uniq_json
FROM top_asn_no
UNION ALL
SELECT
'uniq' AS field_name,
'asn' AS field_value,
unique_count AS count,
NULL AS detail,
NULL AS uniq_json
FROM unique_asn_no
UNION ALL
SELECT
'total' AS field_name,
'count' AS field_value,
total_count AS count,
NULL AS detail,
NULL AS uniq_json
FROM total_count_cte
UNION ALL
SELECT
'uniq' AS field_name,
'ip' AS field_value,
unique_ip_count AS count,
NULL AS detail,
NULL AS uniq_json
FROM unique_ip_count_cte
What You Expected?
上面超长查询修改条件的查询时间应该相差不大,而不是单独查询1s,加一个OR条件直接导致执行时间超长,卡死集群。
How to Reproduce?
No response
Anything Else?
No response
Are you willing to submit PR?
- Yes I am willing to submit a PR!
Code of Conduct
- I agree to follow this project's Code of Conduct
Metadata
Metadata
Assignees
Labels
No labels