Source code for noctis.repository.neo4j.neo4j_functions
import pandas as pd
import warnings
import ast
from noctis.data_architecture.datamodel import GraphRecord
from noctis.data_architecture.datacontainer import DataContainer
from pathlib import Path
from typing import Union, List
[docs]
def _convert_datacontainer_to_query(data_container: DataContainer) -> list[str]:
"""To convert a DataContainer into a query string"""
queries = []
for record in data_container.records:
queries.extend(_convert_record_to_query_neo4j(record))
return queries
[docs]
def _convert_record_to_query_neo4j(record: GraphRecord) -> list[str]:
"""To convert a GraphRecord into a query string"""
queries = []
queries.extend(_create_node_queries(record.nodes))
queries.extend(_create_relationship_queries(record.relationships))
return list(queries)
[docs]
def _create_node_queries(nodes: list) -> list[str]:
"""To create node queries"""
queries = []
for node in nodes:
query = f'MERGE (:{node.node_label} {{uid: "{node.uid}",smiles: "{node.properties["smiles"]}"}})\n'
queries.append(query)
return list(queries)
[docs]
def _create_relationship_queries(relationships: list) -> list[str]:
"""To create relationship query"""
queries = []
for relationship in relationships:
query_relationship = f'MATCH (sn:{relationship.start_node.node_label} {{uid: "{relationship.start_node.uid}"}})\n'
query_relationship += f'MATCH (en:{relationship.end_node.node_label} {{uid: "{relationship.end_node.uid}"}})\n'
query_relationship += f"MERGE (sn)-[:{relationship.relationship_type}]->(en)\n"
queries.append(query_relationship)
return list(queries)
[docs]
def _generate_properties_assignment(properties: list[str]) -> str:
"""To generate properties assignment"""
assignments = []
for field in properties:
assignments.append(f"{field}: apoc.convert.fromJsonMap(row.properties).{field}")
return ", ".join(assignments)
[docs]
def _get_dict_keys_from_csv(csv_file_path):
# Read the CSV file
df = pd.read_csv(csv_file_path)
# Extract the "properties" column
properties_series = df["properties"].apply(ast.literal_eval)
# Get the keys of the dictionaries
all_keys = [list(d.keys()) for d in properties_series]
unique_keys = set().union(*all_keys)
# Check if any dictionary is missing keys
for keys in all_keys:
if set(keys) != unique_keys:
warnings.warn(
f"Some dictionaries are missing keys: {unique_keys - set(keys)}",
UserWarning,
)
break
return list(unique_keys)
[docs]
def _create_neo4j_import_path(directory: Union[str, Path], file_name: str) -> str:
"""
Combines the directory and file name into an absolute file path and converts it into
a file URI suitable for use in a Cypher LOAD CSV command.
Parameters:
directory (Union[str, Path]): The directory containing the file.
file_name (str): The name of the file.
Returns:
str: The file URI that can be fed into a Cypher command.
"""
file_path = Path(directory) / file_name
return file_path.resolve().as_uri()
[docs]
def _generate_files_string(
folder_path: Union[str, Path, None],
prefix: Union[str, None],
items: List[str],
item_type: str,
) -> str:
"""
Generates a string of file descriptors for nodes or relationships.
Parameters:
folder_path (Union[str, Path, None]): The path to the folder containing the CSV files.
prefix (Union[str, None]): The prefix to be added to the CSV file names.
items (List[str]): The list of node labels or relationship types.
item_type (str): Either 'labels' or 'types' to specify whether we're dealing with nodes or relationships.
Returns:
str: A string of file descriptors joined by commas.
"""
query = []
for item in items:
csv_name = f"{prefix + '_' if prefix else ''}{item.upper()}.csv"
if not folder_path:
file_uri = f"file:/{csv_name}"
else:
file_uri = _create_neo4j_import_path(folder_path, csv_name)
query.append(f"{{fileName:'{file_uri}', {item_type}:[]}}")
return ", ".join(query)
[docs]
def _generate_nodes_files_string(
folder_path: Union[str, Path, None],
prefix_nodes: Union[str, None],
nodes_labels: List[str],
) -> str:
"""
Generates a string of file descriptors for nodes.
Parameters:
folder_path (Union[str, Path, None]): The path to the folder containing the CSV files.
prefix_nodes (Union[str, None]): The prefix to be added to the node CSV file names.
nodes_labels (List[str]): The list of node labels.
Returns:
str: A string of file descriptors for nodes joined by commas.
"""
return _generate_files_string(folder_path, prefix_nodes, nodes_labels, "labels")
[docs]
def _generate_relationships_files_string(
folder_path: Union[str, Path, None],
prefix_relationships: Union[str, None],
relationships_types: List[str],
) -> str:
"""
Generates a string of file descriptors for relationships.
Parameters:
folder_path (Union[str, Path, None]): The path to the folder containing the CSV files.
prefix_relationships (Union[str, None]): The prefix to be added to the relationship CSV file names.
relationships_types (List[str]): The list of relationship types.
Returns:
str: A string of file descriptors for relationships joined by commas.
"""
return _generate_files_string(
folder_path, prefix_relationships, relationships_types, "types"
)