modified parsing of mo files to be more structured and handle more cases (e.g., arrays) (99149a4d) · Commits · ExaDigiT / cooling-autocsm

AutoCSM/languages/modelica/create_model_nested.py

+10 −10

Original line number	Diff line number	Diff line
		@@ -81,18 +81,18 @@ def sources_line(data, project_path, base_path, model_class_path, key_path, unif
		# used here to force the the IDE to create an FMU with causality=Input (i.e., time variant).
		# preference would be to have just a start or default value

		temp_names = nested_structure_utils.generate_key_strings(temp_struct, key_path, f'sources_{var[2]}')
		temp_names = nested_structure_utils.generate_key_strings(temp_struct, key_path, f'sources_{var["name"]}')
		temp_struct_filled = nested_structure_utils.replace_struct_values(temp_struct, temp_names)

		if uniform:
		temp_name = nested_structure_utils.get_first_value(temp_struct_filled)
		temp_s.append(f'each {var[2]}={temp_name}'.replace("'",""))
		global_variables_interface.append('Modelica.Blocks.Interfaces.RealInput ' + temp_name + f'(start={var[3]});')
		temp_s.append(f'each {var["name"]}={temp_name}'.replace("'",""))
		global_variables_interface.append('Modelica.Blocks.Interfaces.RealInput ' + temp_name + f'(start={var["value"]});')
		else:
		temp_s.append(f'{var[2]}={temp_struct_filled}'.replace('[', '{').replace(']', '}').replace("'",""))
		temp_s.append(f'{var["name"]}={temp_struct_filled}'.replace('[', '{').replace(']', '}').replace("'",""))

		for temp_name in temp_names:
		global_variables_interface.append('Modelica.Blocks.Interfaces.RealInput ' + temp_name + f'(start={var[3]});')
		global_variables_interface.append('Modelica.Blocks.Interfaces.RealInput ' + temp_name + f'(start={var["value"]});')

		temp = ','.join(temp_s)
		else:
		@@ -101,8 +101,8 @@ def sources_line(data, project_path, base_path, model_class_path, key_path, unif
		temp = ','.join([f'each {var[2]}={var[3]}' for var in inputs])
		else:
		for var in inputs:
		temp_struct_filled = nested_structure_utils.replace_struct_values(temp_struct, [var[3]]*nested_structure_utils.count_leaf_nodes(temp_struct))
		temp_s.append(f'{var[2]}={temp_struct_filled}'.replace('[', '{').replace(']', '}').replace("'",""))
		temp_struct_filled = nested_structure_utils.replace_struct_values(temp_struct, [var["value"]]*nested_structure_utils.count_leaf_nodes(temp_struct))
		temp_s.append(f'{var["name"]}={temp_struct_filled}'.replace('[', '{').replace(']', '}').replace("'",""))
		temp = ','.join(temp_s)
		line += f'sources({temp if temp else ""})\n'
		return line
		@@ -218,7 +218,7 @@ def main(json_file_path, project_path, base_path, parent_class, output_path, str
		# Verify required fields are present: Name, InstanceName (must be unique at that level), ModelClass (default v0), SourceClass (default NULL), Systems= [{}]
		default_structure_parameters = parse_files.get_variable_by_type(structure_path, 'parameter')
		data = copy.deepcopy(data_orig)
		nested_structure_utils.expand_data(data, {item[2]: item[3] for item in default_structure_parameters})
		nested_structure_utils.expand_data(data, {item["name"]: item["value"] for item in default_structure_parameters})


		lines = create_nested_lines(data, project_path, base_path, uniform, force_redeclare=force_redeclare)
		@@ -234,7 +234,7 @@ if __name__ == "__main__":
		output_path = '../../../temp/Simulator.mo'

		# Path to ExaDigiT AutoCSM based project
		project_path = '../../../examples/modelica/GenericCSM'
		project_path = '../../../examples/modelica/GenericDatacenter'

		# Path to top-level system model folder (dotted path) perhaps not a good one
		base_path = pathlib.Path(project_path).name #'GenericCSM'
		@@ -243,7 +243,7 @@ if __name__ == "__main__":
		parent_class = 'ExaDigiT_AutoCSM.BaseClasses.Tests.PartialTest'

		# Structure path
		structure_path ='../../../methods/Modelica/ExaDigiT_AutoCSM/Templates/Structure.mo'
		structure_path ='../../../methods/Modelica/TemplatesCSM/Templates/Structure.mo'

		# Create Modelica model file
		main(json_file_path, project_path, base_path, parent_class, output_path, structure_path, uniform=[False,False])
		No newline at end of file

AutoCSM/languages/modelica/parse_files.py

+222 −37

Original line number	Diff line number	Diff line
		@@ -10,52 +10,237 @@ Users may choose either license, at their discretion.
		import re
		import os

		def get_file_lines(file_path):
		# def get_file_lines(file_path):
		# """
		# Reads all lines from a file and returns them as a list.

		# Parameters:
		# file_path (str): Path to the file to read.

		# Returns:
		# list: List of lines from the file.
		# """
		# with open(file_path, 'r') as struct:
		# lines = struct.readlines()
		# return lines

		# def convert_lines_to_string(lines):
		# """
		# Converts a list of lines into a single string, joining them with spaces.

		# Parameters:
		# lines (list): List of lines (strings).

		# Returns:
		# str: A single string with all lines joined by spaces.
		# """
		# output = ' '.join(lines)
		# # output = output.replace('\n',' ')
		# return output

		# def extract_variable(input_string, dtype=None):
		# """
		# Extracts variable definitions from a given input string based on modelica type.

		# Parameters:
		# input_string (str): The input string to search for variables.
		# dtype (str or list): Type of variable to extract (e.g., 'parameter', 'input').
		# If None, defaults to 'parameter\|input\|output'.

		# Returns:
		# list: A list of tuples containing variable details (dtype, variable name, etc.).
		# """
		# if dtype is None:
		# dtype = 'parameter\|input\|output'
		# elif type(dtype) == list:
		# dtype = '\|'.join(dtype)
		# pattern = r'(?<!final\s)\b({})\s([\w\.]+)\s(\w+)\s=\s([\w\.]+)\s(?:"([^"])")?'.format(dtype)
		# variables = re.findall(pattern, input_string)

		# return variables

		def _remove_comments(content):
		"""
		Reads all lines from a file and returns them as a list.
		Remove Modelica comments while preserving string literals.

		Parameters:
		file_path (str): Path to the file to read.
		Args:
		content (str): The file content as a string.

		Returns:
		list: List of lines from the file.
		str: Content with comments removed.
		"""
		with open(file_path, 'r') as struct:
		lines = struct.readlines()
		return lines
		# Remove multi-line comments (/* ... */) across lines
		content = re.sub(r'/\.?\*/', '', content, flags=re.DOTALL)
		# Process each line to remove single-line comments (//), respecting strings
		lines = content.splitlines()
		for i in range(len(lines)):
		line = lines[i]
		in_string = False
		new_line = []
		for j, char in enumerate(line):
		if char == '"':
		in_string = not in_string
		if not in_string and line[j:j+2] == '//':
		break
		new_line.append(char)
		lines[i] = ''.join(new_line)
		return '\n'.join(lines)

		def convert_lines_to_string(lines):
		def _extract_variable_name_dims_mods(expression):
		"""
		Converts a list of lines into a single string, joining them with spaces.
		Find name, dimensions [], and modifiers () of variable declaration using the '=' as the divider.

		Parameters:
		lines (list): List of lines (strings).
		Args:
		expression (str): The string to search (e.g., 'hello[3](start=0) = 1.0 "cool"').

		Returns:
		str: A single string with all lines joined by spaces.
		name (str): Name of the variable (e.g., hello)
		brackets (str): Contents, if any, of brackets (e.g., 3 or None)
		parenthesis (str): Contents, if any, of parenthesis (e.g., start=0 or None)
		right_part (str): Portion of expression on right side of '=' (e.g., 1.0 "cool")
		"""
		output = ' '.join(lines)
		# output = output.replace('\n',' ')
		return output
		# Step 1: Find the first '=' that is NOT inside parentheses
		depth_paren = 0 # Track parentheses depth
		depth_bracket = 0 # Track brackets depth

		for i, char in enumerate(expression):
		if char == '(':
		depth_paren += 1
		elif char == ')':
		depth_paren -= 1
		elif char == '[':
		depth_bracket += 1
		elif char == ']':
		depth_bracket -= 1
		elif char == '=' and depth_paren == 0: # Found '=' outside parentheses
		left_part = expression[:i].strip()
		right_part = expression[i + 1:].strip()
		break
		else:
		raise ValueError("No valid '=' found outside parentheses.")

		def extract_variable(input_string, dtype=None):
		"""
		Extracts variable definitions from a given input string based on modelica type.
		# Step 2: Extract parts from the left side
		pattern = re.match(r'([^\[\(]+)(?:\[(.?)\])?(?:\((.?)\))?', left_part)

		Parameters:
		input_string (str): The input string to search for variables.
		dtype (str or list): Type of variable to extract (e.g., 'parameter', 'input').
		If None, defaults to 'parameter\|input\|output'.
		if pattern:
		name = pattern.group(1).strip() # Main variable name
		brackets = pattern.group(2).replace(' ', '') if pattern.group(2) else None # Content inside square brackets
		parenthesis = pattern.group(3) if pattern.group(3) else None # Content inside parentheses
		else:
		raise ValueError("Invalid left-hand side format.")

		return name, brackets, parenthesis, right_part


		def extract_variables(file_path, type_prefixes=None, types=None, exclude_prefixes=['final'], invert_type_prefixes=False, invert_types=False):
		"""
		Parse a Modelica file to extract variables declarations.

		Args:
		file_path (str): Path to the Modelica file.
		type_prefixes (str or list, optional): Type-prefixes to extract (e.g., 'parameter\|input\|output' or ['parameter', 'input']).
		Defaults to 'parameter\|input\|output' if None.
		types (list (str), optional): Types to extract (e.g., "Real, String, etc."). Defaults to all if None.
		exclude_prefixes (list (str), optional): Ignore variables with any of the specified prefixes (e.g., "final")
		invert_type_prefixes (bool, optional): Invert the type_prefixes to return all not matching type_prefixes. Only used if type_prefixes is not None.
		invert_types (bool, optional): Invert the invert_types to return all not matching invert_types. Only used if invert_types is not None.
		Returns:
		list: A list of tuples containing variable details (dtype, variable name, etc.).
		list: List of dictionaries containing variable details.
		"""
		if dtype is None:
		dtype = 'parameter\|input\|output'
		elif type(dtype) == list:
		dtype = '\|'.join(dtype)
		pattern = r'(?<!final\s)\b({})\s([\w\.]+)\s(\w+)\s=\s([\w\.]+)\s(?:"([^"])")?'.format(dtype)
		variables = re.findall(pattern, input_string)
		# Read the file content
		with open(file_path, 'r') as file:
		content = file.read()

		# Remove all comments from the content
		content = _remove_comments(content)

		# Set up the type prefix pattern for the regex
		if type_prefixes is None:
		type_prefixes = 'parameter\|input\|output'
		elif isinstance(type_prefixes, list):
		type_prefixes = '\|'.join(type_prefixes)

		# Define regex pattern to match variable declarations
		# Captures: (prefixes) (type_prefix) (type) (definition);
		pattern = r'((?:\b\w+\s+))\b({})\s+([A-Za-z_][\w.])\s+(.*?);'.format(type_prefixes)
		matches = re.finditer(pattern, content, re.DOTALL)

		variables = []
		for match in matches:
		prefixes = match.group(1).strip().split() # Extract and split prefixes to list (e.g., final)
		type_prefix = match.group(2) # e.g., 'parameter', 'input'
		type_ = match.group(3) # e.g., 'Real', 'Integer'
		definition = match.group(4).strip() # remaining portions of variable declartion (e.g., "NAME(...)=...")

		# Extract variable name, dimensions (if any), and modifiers (if any)
		name, dimensions, modifiers, definition = _extract_variable_name_dims_mods(definition)

		# Find then remove annotation if present
		annotation = None
		annotation_index = definition.find("annotation(")
		if annotation_index != -1:
		annotation = definition[annotation_index:]
		definition = definition[:annotation_index]

		# Extract comment if present. If type is String than set value.
		comment = None
		value = None
		comment_matches = re.findall(r'"(.*?)"', definition)
		if len(comment_matches) == 1 and type_ != 'String':
		comment = comment_matches[0]
		elif len(comment_matches) == 1 and type_ == 'String':
		# No comment
		value = comment_matches[0]
		elif len(comment_matches) == 2 and type_ == 'String':
		comment = comment_matches[1]
		value = comment_matches[0]
		elif len(comment_matches) == 0 and type_ == 'String':
		raise ValueError('No value found for variable of type String: {}'.format(definition))
		elif len(comment_matches) > 2:
		raise ValueError('Unrecongized variable definition: {}'.format(definition))

		# Remove comment and update remaining string
		if comment is not None:
		pattern = rf'"{re.escape(comment)}"'
		definition = re.sub(pattern, '', definition)

		# If value not set to remaining content (i.e., not a string)
		if value is None:
		value = definition.replace(' ','')

		# Exclusion logic
		if type_prefixes:
		if invert_type_prefixes:
		if type_prefix in type_prefixes:
		continue
		else:
		if type_prefix not in type_prefixes:
		continue

		if types:
		if invert_types:
		if type_ in types:
		continue
		else:
		if type_ not in types:
		continue

		if any(prefix in prefixes for prefix in exclude_prefixes):
		continue

		# Construct dictionary with variable details
		variable = {
		"name": name,
		"prefixes": prefixes if prefixes else None,
		"type_prefix": type_prefix,
		"type": type_,
		"dimensions": dimensions,
		"modifiers": modifiers,
		"value": value,
		"comment": comment,
		"annotation": annotation
		}
		variables.append(variable)

		return variables

		@@ -70,9 +255,9 @@ def get_variable_by_type(file_path, dtype='parameter'):
		Returns:
		list: A list of tuples representing the variables found.
		"""
		lines = get_file_lines(file_path)
		input_string = convert_lines_to_string(lines)
		parameters = extract_variable(input_string, dtype)
		# lines = get_file_lines(file_path)
		# input_string = convert_lines_to_string(lines)
		parameters = extract_variables(file_path, dtype)
		return parameters

		def extract_default_class_from_model(file_path, folder='Sources', instance='sources', ignore_prefix=False):
		@@ -175,7 +360,7 @@ def search_log_file_reverse(file_path, search_text):

		if __name__ == "__main__":

		file_path = '../../../methods/modelica/ExaDigiT_AutoCSM/Templates/Structure.mo'
		file_path = '../../../methods/modelica/TemplatesCSM/Templates/Structure.mo'
		parameters = get_variable_by_type(file_path)
		print(parameters)