import pyreason.scripts.numba_wrapper.numba_types.interval_type as interval
import re
_PREDICATE_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_.\-]*$')
_COMPONENT_RE = re.compile(r'^[a-zA-Z0-9_][a-zA-Z0-9_.@\-]*$')
def _validate_predicate(name):
"""Validate that a predicate name starts with a letter/underscore."""
if not name:
raise ValueError("Predicate name cannot be empty")
if not _PREDICATE_RE.match(name):
if name[0].isdigit():
raise ValueError(f"Predicate name '{name}' cannot start with a digit. Must start with a letter or underscore")
else:
raise ValueError(f"Predicate name '{name}' contains invalid characters. Must match [a-zA-Z_][a-zA-Z0-9_.\\-]*")
def _validate_component(name, context):
"""Validate that a component (entity) name contains only valid characters. May start with a digit."""
if not name:
raise ValueError(f"{context} name cannot be empty")
if not _COMPONENT_RE.match(name):
raise ValueError(f"{context} name '{name}' contains invalid characters. Must match [a-zA-Z0-9_][a-zA-Z0-9_.@\\-]*")
# Input validation work was implemented with the help of Claude Sonnet 4.5.
[docs]
def parse_fact(fact_text):
# Validate input is not empty or whitespace only
if not fact_text or not fact_text.strip():
raise ValueError("Fact text cannot be empty or whitespace only")
f = fact_text.replace(' ', '')
# Check for multiple colons
colon_count = f.count(':')
if colon_count > 1:
raise ValueError(f"Fact text contains multiple colons ({colon_count}), expected at most 1")
# Check for double negation
if f.startswith('~~'):
raise ValueError("Double negation is not allowed")
# Separate into predicate-component and bound. If there is no bound it means it's true
negate_interval = False
if ':' in f:
parts = f.split(':')
if len(parts) != 2:
raise ValueError("Invalid fact format: expected at most one colon separator")
pred_comp, bound = parts
# Check for negation with explicit bound
if pred_comp.startswith('~'):
pred_comp = pred_comp[1:]
if bound.lower() == 'true':
bound = 'False'
elif bound.lower() == 'false':
bound = 'True'
else:
negate_interval = True
else:
pred_comp = f
if pred_comp.startswith('~'):
bound = 'False'
pred_comp = pred_comp[1:]
else:
bound = 'True'
# Validate predicate-component is not empty
if not pred_comp:
raise ValueError("Predicate-component cannot be empty")
# Validate parentheses exist and are properly formed
if '(' not in pred_comp:
raise ValueError("Missing opening parenthesis in fact")
if ')' not in pred_comp:
raise ValueError("Missing closing parenthesis in fact")
# Check for nested or multiple parentheses
open_count = pred_comp.count('(')
close_count = pred_comp.count(')')
if open_count != 1 or close_count != 1:
raise ValueError(f"Invalid parentheses: found {open_count} '(' and {close_count} ')', expected exactly 1 of each")
# Check parentheses are in correct order
open_idx = pred_comp.find('(')
close_idx = pred_comp.find(')')
if open_idx >= close_idx:
raise ValueError("Invalid parentheses order: '(' must come before ')'")
# Check closing parenthesis is at the end
if close_idx != len(pred_comp) - 1:
raise ValueError("Closing parenthesis must be at the end of predicate-component")
# Split the predicate and component
idx = pred_comp.find('(')
pred = pred_comp[:idx]
component = pred_comp[idx + 1:-1]
# Validate predicate name
_validate_predicate(pred)
# Validate component is not empty
if not component:
raise ValueError("Component cannot be empty")
# Check if it is a node or edge fact
if ',' in component:
fact_type = 'edge'
components = component.split(',')
# Validate exactly 2 components for edges
if len(components) != 2:
raise ValueError(f"Edge facts must have exactly 2 components, found {len(components)}")
# Validate component names
for i, comp in enumerate(components):
_validate_component(comp, f"Edge component {i+1}")
component = tuple(components)
else:
fact_type = 'node'
_validate_component(component, "Node component")
# Check if bound is a boolean or a list of floats
if bound.lower() == 'true':
bound = interval.closed(1, 1)
elif bound.lower() == 'false':
bound = interval.closed(0, 0)
else:
# Validate interval format
if not bound.startswith('['):
raise ValueError(f"Invalid bound format: expected '[' at start of interval, got '{bound[0] if bound else 'empty'}'")
if not bound.endswith(']'):
raise ValueError(f"Invalid bound format: expected ']' at end of interval, got '{bound[-1] if bound else 'empty'}'")
# Extract values between brackets
interval_content = bound[1:-1]
if not interval_content:
raise ValueError("Interval cannot be empty")
# Parse float values
parts = interval_content.split(',')
if len(parts) != 2:
raise ValueError(f"Interval must have exactly 2 values, found {len(parts)}")
try:
bound_values = [float(b) for b in parts]
except ValueError as e:
raise ValueError(f"Invalid interval values: {e}")
lower, upper = bound_values
# Validate bounds are in valid range [0, 1]
if lower < 0 or lower > 1:
raise ValueError(f"Interval lower bound {lower} is out of valid range [0, 1]")
if upper < 0 or upper > 1:
raise ValueError(f"Interval upper bound {upper} is out of valid range [0, 1]")
# Validate lower <= upper
if lower > upper:
raise ValueError(f"Interval lower bound {lower} cannot be greater than upper bound {upper}")
# We calculate ~[l,u] = [1-u, 1-l]
# Round to eliminate floating point precision errors (e.g., 1 - 0.8 = 0.19999999...)
if negate_interval:
lower, upper = round(1 - upper, 10), round(1 - lower, 10)
bound = interval.closed(lower, upper)
return pred, component, bound, fact_type