Source code for pyreason.scripts.utils.fact_parser

import pyreason.scripts.numba_wrapper.numba_types.interval_type as interval
import re

_PREDICATE_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_.\-]*$')
_COMPONENT_RE = re.compile(r'^[a-zA-Z0-9_][a-zA-Z0-9_.@\-]*$')


def _validate_predicate(name):
    """Validate that a predicate name starts with a letter/underscore."""
    if not name:
        raise ValueError("Predicate name cannot be empty")
    if not _PREDICATE_RE.match(name):
        if name[0].isdigit():
            raise ValueError(f"Predicate name '{name}' cannot start with a digit. Must start with a letter or underscore")
        else:
            raise ValueError(f"Predicate name '{name}' contains invalid characters. Must match [a-zA-Z_][a-zA-Z0-9_.\\-]*")


def _validate_component(name, context):
    """Validate that a component (entity) name contains only valid characters. May start with a digit."""
    if not name:
        raise ValueError(f"{context} name cannot be empty")
    if not _COMPONENT_RE.match(name):
        raise ValueError(f"{context} name '{name}' contains invalid characters. Must match [a-zA-Z0-9_][a-zA-Z0-9_.@\\-]*")


# Input validation work was implemented with the help of Claude Sonnet 4.5.
[docs] def parse_fact(fact_text): # Validate input is not empty or whitespace only if not fact_text or not fact_text.strip(): raise ValueError("Fact text cannot be empty or whitespace only") f = fact_text.replace(' ', '') # Check for multiple colons colon_count = f.count(':') if colon_count > 1: raise ValueError(f"Fact text contains multiple colons ({colon_count}), expected at most 1") # Check for double negation if f.startswith('~~'): raise ValueError("Double negation is not allowed") # Separate into predicate-component and bound. If there is no bound it means it's true negate_interval = False if ':' in f: parts = f.split(':') if len(parts) != 2: raise ValueError("Invalid fact format: expected at most one colon separator") pred_comp, bound = parts # Check for negation with explicit bound if pred_comp.startswith('~'): pred_comp = pred_comp[1:] if bound.lower() == 'true': bound = 'False' elif bound.lower() == 'false': bound = 'True' else: negate_interval = True else: pred_comp = f if pred_comp.startswith('~'): bound = 'False' pred_comp = pred_comp[1:] else: bound = 'True' # Validate predicate-component is not empty if not pred_comp: raise ValueError("Predicate-component cannot be empty") # Validate parentheses exist and are properly formed if '(' not in pred_comp: raise ValueError("Missing opening parenthesis in fact") if ')' not in pred_comp: raise ValueError("Missing closing parenthesis in fact") # Check for nested or multiple parentheses open_count = pred_comp.count('(') close_count = pred_comp.count(')') if open_count != 1 or close_count != 1: raise ValueError(f"Invalid parentheses: found {open_count} '(' and {close_count} ')', expected exactly 1 of each") # Check parentheses are in correct order open_idx = pred_comp.find('(') close_idx = pred_comp.find(')') if open_idx >= close_idx: raise ValueError("Invalid parentheses order: '(' must come before ')'") # Check closing parenthesis is at the end if close_idx != len(pred_comp) - 1: raise ValueError("Closing parenthesis must be at the end of predicate-component") # Split the predicate and component idx = pred_comp.find('(') pred = pred_comp[:idx] component = pred_comp[idx + 1:-1] # Validate predicate name _validate_predicate(pred) # Validate component is not empty if not component: raise ValueError("Component cannot be empty") # Check if it is a node or edge fact if ',' in component: fact_type = 'edge' components = component.split(',') # Validate exactly 2 components for edges if len(components) != 2: raise ValueError(f"Edge facts must have exactly 2 components, found {len(components)}") # Validate component names for i, comp in enumerate(components): _validate_component(comp, f"Edge component {i+1}") component = tuple(components) else: fact_type = 'node' _validate_component(component, "Node component") # Check if bound is a boolean or a list of floats if bound.lower() == 'true': bound = interval.closed(1, 1) elif bound.lower() == 'false': bound = interval.closed(0, 0) else: # Validate interval format if not bound.startswith('['): raise ValueError(f"Invalid bound format: expected '[' at start of interval, got '{bound[0] if bound else 'empty'}'") if not bound.endswith(']'): raise ValueError(f"Invalid bound format: expected ']' at end of interval, got '{bound[-1] if bound else 'empty'}'") # Extract values between brackets interval_content = bound[1:-1] if not interval_content: raise ValueError("Interval cannot be empty") # Parse float values parts = interval_content.split(',') if len(parts) != 2: raise ValueError(f"Interval must have exactly 2 values, found {len(parts)}") try: bound_values = [float(b) for b in parts] except ValueError as e: raise ValueError(f"Invalid interval values: {e}") lower, upper = bound_values # Validate bounds are in valid range [0, 1] if lower < 0 or lower > 1: raise ValueError(f"Interval lower bound {lower} is out of valid range [0, 1]") if upper < 0 or upper > 1: raise ValueError(f"Interval upper bound {upper} is out of valid range [0, 1]") # Validate lower <= upper if lower > upper: raise ValueError(f"Interval lower bound {lower} cannot be greater than upper bound {upper}") # We calculate ~[l,u] = [1-u, 1-l] # Round to eliminate floating point precision errors (e.g., 1 - 0.8 = 0.19999999...) if negate_interval: lower, upper = round(1 - upper, 10), round(1 - lower, 10) bound = interval.closed(lower, upper) return pred, component, bound, fact_type