Source code for noctis.repository.neo4j.neo4j_functions

import pandas as pd
import warnings
import ast

from noctis.data_architecture.datamodel import GraphRecord
from noctis.data_architecture.datacontainer import DataContainer
from pathlib import Path
from typing import Union, List


[docs] def _convert_datacontainer_to_query(data_container: DataContainer) -> list[str]: """To convert a DataContainer into a query string""" queries = [] for record in data_container.records: queries.extend(_convert_record_to_query_neo4j(record)) return queries
[docs] def _convert_record_to_query_neo4j(record: GraphRecord) -> list[str]: """To convert a GraphRecord into a query string""" queries = [] queries.extend(_create_node_queries(record.nodes)) queries.extend(_create_relationship_queries(record.relationships)) return list(queries)
[docs] def _create_node_queries(nodes: list) -> list[str]: """To create node queries""" queries = [] for node in nodes: query = f'MERGE (:{node.node_label} {{uid: "{node.uid}",smiles: "{node.properties["smiles"]}"}})\n' queries.append(query) return list(queries)
[docs] def _create_relationship_queries(relationships: list) -> list[str]: """To create relationship query""" queries = [] for relationship in relationships: query_relationship = f'MATCH (sn:{relationship.start_node.node_label} {{uid: "{relationship.start_node.uid}"}})\n' query_relationship += f'MATCH (en:{relationship.end_node.node_label} {{uid: "{relationship.end_node.uid}"}})\n' query_relationship += f"MERGE (sn)-[:{relationship.relationship_type}]->(en)\n" queries.append(query_relationship) return list(queries)
[docs] def _generate_properties_assignment(properties: list[str]) -> str: """To generate properties assignment""" assignments = [] for field in properties: assignments.append(f"{field}: apoc.convert.fromJsonMap(row.properties).{field}") return ", ".join(assignments)
[docs] def _get_dict_keys_from_csv(csv_file_path): # Read the CSV file df = pd.read_csv(csv_file_path) # Extract the "properties" column properties_series = df["properties"].apply(ast.literal_eval) # Get the keys of the dictionaries all_keys = [list(d.keys()) for d in properties_series] unique_keys = set().union(*all_keys) # Check if any dictionary is missing keys for keys in all_keys: if set(keys) != unique_keys: warnings.warn( f"Some dictionaries are missing keys: {unique_keys - set(keys)}", UserWarning, ) break return list(unique_keys)
[docs] def _create_neo4j_import_path(directory: Union[str, Path], file_name: str) -> str: """ Combines the directory and file name into an absolute file path and converts it into a file URI suitable for use in a Cypher LOAD CSV command. Parameters: directory (Union[str, Path]): The directory containing the file. file_name (str): The name of the file. Returns: str: The file URI that can be fed into a Cypher command. """ file_path = Path(directory) / file_name return file_path.resolve().as_uri()
[docs] def _generate_files_string( folder_path: Union[str, Path, None], prefix: Union[str, None], items: List[str], item_type: str, ) -> str: """ Generates a string of file descriptors for nodes or relationships. Parameters: folder_path (Union[str, Path, None]): The path to the folder containing the CSV files. prefix (Union[str, None]): The prefix to be added to the CSV file names. items (List[str]): The list of node labels or relationship types. item_type (str): Either 'labels' or 'types' to specify whether we're dealing with nodes or relationships. Returns: str: A string of file descriptors joined by commas. """ query = [] for item in items: csv_name = f"{prefix + '_' if prefix else ''}{item.upper()}.csv" if not folder_path: file_uri = f"file:/{csv_name}" else: file_uri = _create_neo4j_import_path(folder_path, csv_name) query.append(f"{{fileName:'{file_uri}', {item_type}:[]}}") return ", ".join(query)
[docs] def _generate_nodes_files_string( folder_path: Union[str, Path, None], prefix_nodes: Union[str, None], nodes_labels: List[str], ) -> str: """ Generates a string of file descriptors for nodes. Parameters: folder_path (Union[str, Path, None]): The path to the folder containing the CSV files. prefix_nodes (Union[str, None]): The prefix to be added to the node CSV file names. nodes_labels (List[str]): The list of node labels. Returns: str: A string of file descriptors for nodes joined by commas. """ return _generate_files_string(folder_path, prefix_nodes, nodes_labels, "labels")
[docs] def _generate_relationships_files_string( folder_path: Union[str, Path, None], prefix_relationships: Union[str, None], relationships_types: List[str], ) -> str: """ Generates a string of file descriptors for relationships. Parameters: folder_path (Union[str, Path, None]): The path to the folder containing the CSV files. prefix_relationships (Union[str, None]): The prefix to be added to the relationship CSV file names. relationships_types (List[str]): The list of relationship types. Returns: str: A string of file descriptors for relationships joined by commas. """ return _generate_files_string( folder_path, prefix_relationships, relationships_types, "types" )