Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion configuration/dqdl/DataQualityDefinitionLanguage.g4
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ sizeBasedCondition:
ruleType: IDENTIFIER;
analyzerType: IDENTIFIER;
parameter: QUOTED_STRING
| IDENTIFIER;
| IDENTIFIER
| variableDereference;
connectorWord: OF | AND;
parameterWithConnectorWord: connectorWord? parameter;
tagWithCondition: 'with' tagValues (stringBasedCondition | numberBasedCondition);
Expand Down
6 changes: 4 additions & 2 deletions configuration/rules/rules-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@
{
"type": "String",
"name": "TargetColumn",
"description": "Name of column to check uniqueness of"
"description": "Name of column(s) to check uniqueness of",
"is_var_arg": true
}
],
"return_type": "NUMBER",
Expand All @@ -139,7 +140,8 @@
{
"type": "String",
"name": "TargetColumn",
"description": "Name of column to check uniqueness of"
"description": "Name of column(s) to check uniqueness of",
"is_var_arg": true
}
],
"return_type": "BOOLEAN",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

package com.amazonaws.glue.ml.dataquality.dqdl.model;

import com.amazonaws.glue.ml.dataquality.dqdl.model.parameter.DQRuleParameterValue;
import lombok.AllArgsConstructor;
import lombok.Getter;

Expand Down
44 changes: 1 addition & 43 deletions src/com/amazonaws/glue/ml/dataquality/dqdl/model/DQRule.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import com.amazonaws.glue.ml.dataquality.dqdl.model.condition.Condition;
import com.amazonaws.glue.ml.dataquality.dqdl.model.condition.string.Tag;
import com.amazonaws.glue.ml.dataquality.dqdl.model.parameter.DQRuleParameterValue;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
Expand All @@ -20,13 +21,11 @@

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import static com.amazonaws.glue.ml.dataquality.dqdl.model.condition.string.Tag.convertToStringMap;
import static com.amazonaws.glue.ml.dataquality.dqdl.parser.DQDLVariableResolver.resolveVariablesInCondition;
import static com.amazonaws.glue.ml.dataquality.dqdl.util.StringUtils.isBlank;

@AllArgsConstructor
Expand Down Expand Up @@ -138,47 +137,6 @@ public static DQRule createFromParameterValueMap(final DQRuleType ruleType,
);
}

// Add a new method for creating with variable resolution
public static DQRule createFromParameterValueMapWithVariables(final DQRuleType ruleType,
final LinkedHashMap<String, DQRuleParameterValue>
parameters,
final Condition condition,
final Condition thresholdCondition,
final String whereClause,
final Map<String, Tag> tags,
final Map<String, DQVariable> variables) {
// Create the unresolved rule first
DQRule unresolvedRule = createFromParameterValueMap(ruleType, parameters, condition,
thresholdCondition, whereClause, tags);

// If there are no variables to resolve, return the unresolved rule
if (variables == null || variables.isEmpty()) {
return unresolvedRule;
}

Map<String, DQVariable> usedVars = new HashMap<>();

// Resolve variables in conditions
Condition resolvedCondition = condition != null
? resolveVariablesInCondition(condition, variables, usedVars) : null;
Condition resolvedThresholdCondition = thresholdCondition != null
? resolveVariablesInCondition(thresholdCondition, variables, usedVars) : null;

// Create the resolved rule
return new DQRule(
ruleType.getRuleTypeName(),
DQRuleParameterValue.createParameterMap(parameters),
parameters,
resolvedCondition,
resolvedThresholdCondition,
DQRuleLogicalOperator.AND,
new ArrayList<>(),
whereClause,
ruleType.isExcludedAtRowLevelInCompositeRules(),
tags
);
}

public DQRule withNestedRules(final List<DQRule> nestedRules) {
return this.toBuilder().nestedRules(nestedRules).build();
}
Expand Down

This file was deleted.

20 changes: 14 additions & 6 deletions src/com/amazonaws/glue/ml/dataquality/dqdl/model/DQRuleType.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

package com.amazonaws.glue.ml.dataquality.dqdl.model;

import com.amazonaws.glue.ml.dataquality.dqdl.model.parameter.DQRuleParameterValue;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.AllArgsConstructor;
Expand Down Expand Up @@ -99,7 +100,8 @@ public Optional<String> verifyParameters(List<DQRuleParameter> expectedParameter
}

public LinkedHashMap<String, DQRuleParameterValue> createParameterMap(List<DQRuleParameter> dqRuleTypeParameters,
List<DQRuleParameterValue> actualParameters) {
List<DQRuleParameterValue> actualParameters,
String typeName) {
LinkedHashMap<String, DQRuleParameterValue> parameterMap = new LinkedHashMap<>();

for (int i = 0; i < dqRuleTypeParameters.size(); i++) {
Expand All @@ -112,11 +114,17 @@ public LinkedHashMap<String, DQRuleParameterValue> createParameterMap(List<DQRul
counter = dqRuleTypeParameters.size() - 1;
}

for (int j = counter; j < actualParameters.size(); j++) {
String newDqRuleTypeParameterName = dqRuleTypeParameterName + (j + 1);
DQRuleParameterValue actualParameterName = actualParameters.get(j);

parameterMap.put(newDqRuleTypeParameterName, actualParameterName);
// Special handling for Uniqueness and IsUnique with single column
if ((typeName.equals("Uniqueness") || typeName.equals("IsUnique"))
&& actualParameters.size() == 1) {
parameterMap.put(dqRuleTypeParameterName, actualParameters.get(0));
} else {
// Original behavior for all other cases
for (int j = counter; j < actualParameters.size(); j++) {
String newDqRuleTypeParameterName = dqRuleTypeParameterName + (j + 1);
DQRuleParameterValue actualParameterName = actualParameters.get(j);
parameterMap.put(newDqRuleTypeParameterName, actualParameterName);
}
}
} else {
parameterMap.put(dqRuleTypeParameterName, actualParameters.get(i));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public String getFormattedExpression() {

@Override
public LocalDateTime getEvaluatedExpression() {
return LocalDateTime.now(ZoneOffset.UTC);
return LocalDateTime.now(ZoneOffset.UTC).withSecond(0).withNano(0);
}
}

Expand Down Expand Up @@ -103,13 +103,13 @@ public LocalDateTime getEvaluatedExpression() {
return evaluateMinutes(
operator,
duration.getAmount() * 60,
LocalDateTime.now(ZoneOffset.UTC).withMinute(0)
LocalDateTime.now(ZoneOffset.UTC)
);
case DAYS:
return evaluateMinutes(
operator,
duration.getAmount() * 60 * 24,
LocalDateTime.now(ZoneOffset.UTC).withMinute(0)
LocalDateTime.now(ZoneOffset.UTC)
);
default:
throw new RuntimeException("Unsupported duration unit: " + duration.getUnit());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import com.amazonaws.glue.ml.dataquality.dqdl.model.DQRule;
import com.amazonaws.glue.ml.dataquality.dqdl.model.condition.Condition;
import com.amazonaws.glue.ml.dataquality.dqdl.util.StringUtils;
import static java.lang.Math.abs;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
Expand Down Expand Up @@ -46,78 +45,77 @@ public Boolean evaluate(Double metric, DQRule dqRule, OperandEvaluator evaluator
List<Double> operandsAsDouble = operands.stream()
.map(operand -> evaluator.evaluate(dqRule, operand)).collect(Collectors.toList());


log.info(String.format("Evaluating condition for rule: %s", dqRule));
List<String> formatOps = operandsAsDouble.stream().map(OP_FORMAT::format).collect(Collectors.toList());
String formatMetric = OP_FORMAT.format(metric);

switch (operator) {
case BETWEEN:
if (operands.size() != 2) return false;
else {
boolean result = metric > operandsAsDouble.get(0) && metric < operandsAsDouble.get(1);
log.info("{} between {} and {}? {}", formatMetric, formatOps.get(0), formatOps.get(1), result);
log.info("{} between {} and {}? {}",
metric, operandsAsDouble.get(0), operandsAsDouble.get(1), result);
return result;
}
case NOT_BETWEEN:
if (operands.size() != 2) return false;
else {
boolean result = metric <= operandsAsDouble.get(0) || metric >= operandsAsDouble.get(1);
log.info("{} not between {} and {}? {}", formatMetric, formatOps.get(0), formatOps.get(1), result);
log.info("{} not between {} and {}? {}",
metric, operandsAsDouble.get(0), operandsAsDouble.get(1), result);
return result;
}
case GREATER_THAN_EQUAL_TO:
if (operands.size() != 1) return false;
else {
boolean result = metric >= operandsAsDouble.get(0);
log.info("{} >= {}? {}", formatMetric, formatOps.get(0), result);
log.info("{} >= {}? {}", metric, operandsAsDouble.get(0), result);
return result;
}
case GREATER_THAN:
if (operands.size() != 1) return false;
else {
boolean result = metric > operandsAsDouble.get(0);
log.info("{} > {}? {}", formatMetric, formatOps.get(0), result);
log.info("{} > {}? {}", metric, operandsAsDouble.get(0), result);
return result;
}
case LESS_THAN_EQUAL_TO:
if (operands.size() != 1) return false;
else {
boolean result = metric <= operandsAsDouble.get(0);
log.info("{} <= {}? {}", formatMetric, formatOps.get(0), result);
log.info("{} <= {}? {}", metric, operandsAsDouble.get(0), result);
return result;
}
case LESS_THAN:
if (operands.size() != 1) return false;
else {
boolean result = metric < operandsAsDouble.get(0);
log.info("{} < {}? {}", formatMetric, formatOps.get(0), result);
log.info("{} < {}? {}", metric, operandsAsDouble.get(0), result);
return result;
}
case EQUALS:
if (operands.size() != 1) return false;
else {
boolean result = isOperandEqualToMetric(metric, operandsAsDouble.get(0));
log.info("{} == {}? {}", formatMetric, formatOps.get(0), result);
log.info("{} == {}? {}", metric, operandsAsDouble.get(0), result);
return result;
}
case NOT_EQUALS:
if (operands.size() != 1) return false;
else {
boolean result = !isOperandEqualToMetric(metric, operandsAsDouble.get(0));
log.info("{} != {}? {}", formatMetric, formatOps.get(0), result);
log.info("{} != {}? {}", metric, operandsAsDouble.get(0), result);
return result;
}
case IN: {
boolean result = operandsAsDouble.stream().anyMatch(operand ->
isOperandEqualToMetric(metric, operand));
log.info("{} in {}? {}", formatMetric, formatOps, result);
log.info("{} in {}? {}", metric, operandsAsDouble, result);
return result;
}
case NOT_IN: {
boolean result = !operandsAsDouble.stream().anyMatch(operand ->
isOperandEqualToMetric(metric, operand));
log.info("{} not in {}? {}", formatMetric, formatOps, result);
log.info("{} not in {}? {}", metric, operandsAsDouble, result);
return result;
}
default:
Expand Down Expand Up @@ -196,6 +194,6 @@ private String getSortedFormattedOperands() {
}

protected boolean isOperandEqualToMetric(Double metric, Double operand) {
return abs(metric - operand) <= 0.00001;
return metric.equals(operand);
}
}
Loading