Source code for noctis.data_architecture.datacontainer

from pydantic import BaseModel, Field
from typing import Optional
from noctis.data_architecture.datamodel import GraphRecord
from noctis.data_transformation.postprocessing.chemdata_generators import (
    ChemDataGeneratorFactory,
)
from noctis import settings


[docs] class DataContainer(BaseModel): """ A container for managing and transforming chemical equation data. DataContainer is a collection of GraphRecord objects with methods for managing records and transforming data into various formats. Attributes: records (list[GraphRecord]): List of GraphRecord objects. ce_label (str): Label for chemical equations (default: settings.nodes.node_chemequation). Methods: add_record(record): Add a GraphRecord to the container. get_record(record_key): Retrieve a specific GraphRecord. get_records(record_keys): Retrieve multiple GraphRecords. get_subcontainer_with_records(record_keys): Create a new DataContainer with specified records. transform_to(format_type, with_record_id, ce_label): Transform data to specified format. Note: DataContainer objects can be compared for equality using the == operator. """ records: list[GraphRecord] = Field(default_factory=list) ce_label: str = Field(default=settings.nodes.node_chemequation) def __eq__(self, other): if isinstance(other, DataContainer): return self.records == other.records return False
[docs] def set_ce_label(self, ce_label: str) -> None: self.ce_label = ce_label
[docs] def add_record(self, record: GraphRecord) -> None: """To add a GraphRecord to a DataContainer""" self.records.append(record)
[docs] def get_record(self, record_key: int) -> GraphRecord: return self.records[record_key]
[docs] def get_records(self, record_keys: list[int]) -> list[GraphRecord]: return [self.records[key] for key in record_keys]
[docs] def get_subcontainer_with_records(self, record_keys: list[int]) -> "DataContainer": subcontainer = DataContainer() missing_keys: set[int] = set() for key in record_keys: if key < len(self.records): subcontainer.add_record(self.records[key].__deepcopy__()) else: missing_keys.add(key) if missing_keys: missing_keys_str = ", ".join(map(str, missing_keys)) raise KeyError( f"Record keys {missing_keys_str} not found in DataContainer." ) return subcontainer
def __str__(self) -> str: num_records = len(self.records) records_preview = ", ".join( str(record) for record in self.records[:10] ) # Preview first 3 records return ( f"DataContainer with {num_records} records\n" f"Chemical Equation Label: {self.ce_label}\n" f"Records Preview: [{records_preview}]" )
[docs] def transform_to( self, format_type: str, with_record_id: Optional[bool] = True, ce_label: Optional[str] = None, ): if ce_label is None: ce_label = self.ce_label generator = ChemDataGeneratorFactory().get_generator(format_type) return generator.generate(self.records, with_record_id, ce_label)
[docs] @classmethod def info(cls) -> str: """Return detailed information about registered generators, reaction formats, and usage.""" # Get available format types and reaction string formats from ChemDataGeneratorFactory available_format_types = ChemDataGeneratorFactory.get_available_formats() available_reaction_formats = ( ChemDataGeneratorFactory.get_available_reaction_formats() ) info_lines = [ "DataContainer Class Information:", "================================", "Attributes:", "-----------", "records (list[GraphRecord]): List of GraphRecord objects.", "ce_label (str): Label for chemical equations (default: settings.nodes.node_chemequation).", "", "Methods:", "--------", "add_record(record): Add a GraphRecord to the container.", "get_record(record_key): Retrieve a specific GraphRecord.", "get_records(record_keys): Retrieve multiple GraphRecords.", "get_subcontainer_with_records(record_keys): Create a new DataContainer with specified records.", "transform_to(format_type, with_record_id, ce_label): Transform data to specified format.", "", "Available Format Types for transform_to:", "----------------------------------------", ", ".join(available_format_types), "", "Available Reaction String Formats:", "-----------------------------------", ", ".join(available_reaction_formats), "", "Usage Example:", "--------------", "data_container = DataContainer()\n" "data_container.add_record(GraphRecord(...))\n" "record = data_container.get_record(0)\n" "subcontainer = data_container.get_subcontainer_with_records([0, 1, 2])\n" "dataframe_nodes, dataframe_relationships = data_container.transform_to(format_type='pandas')\n", ] return print("\n".join(info_lines))