File size: 5,721 Bytes
69e0484
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
from rdflib import Graph, RDF, RDFS, OWL, term, URIRef

from rdflib.exceptions import ParserError
from smolagents import tool


@tool
def validate_rdf_syntax(
    file_path: str, file_format: str = "turtle"
) -> tuple[bool, str]:
    """
    Validates the syntax of an RDF file.

    This function parses an RDF file using the specified format and checks if it
    is syntactically valid.  It handles potential parsing errors and other
    exceptions that may occur during the validation process.

    Args:
        file_path (str): The path to the RDF file to validate.
        file_format (str, optional): The RDF file format.
            Defaults to "turtle".  Other common formats include "xml" (for RDF/XML),
            "nt" (for N-Triples), and "nq" (for N-Quads).

    Returns:
        tuple: A tuple containing two elements:
            - bool: True if the RDF syntax is valid, False otherwise.
            - str: A message describing the validation result.  If the syntax is
              valid, the message is "RDF syntax is valid.".  If there is an error,
              the message provides details about the error.

    Raises:
        ParserError: If the RDF file has invalid syntax according to the specified format.
        Exception: For any other unexpected error during file processing or parsing.
    """
    g = Graph()
    try:
        g.parse(file_path, format=file_format)
        return True, "RDF syntax is valid."
    except ParserError as e:
        return False, f"RDF syntax error: {e}"
    except Exception as e:
        return False, f"An unexpected error occurred: {e}"


@tool
def write_rdf_to_file(filename: str, text: str) -> None:
    """
    Writes the given text to a file.

    Args:
        filename (str): The name of the file to write to, without extension.
        text (str): The text to write to the file.
    """
    try:
        with open(f"{filename}.ttl", "w") as file:
            file.write(text)
        print(f"Successfully wrote text to {filename}")
    except Exception as e:
        print(f"An error occurred while writing to {filename}: {e}")


@tool
def get_entities_from_kg() -> list:
    """
    Loads a knowledge graph and returns a list of identified entity URIs.
    Entities are typically resources with an rdf:type linking them to a class,
    or those explicitly declared as owl:NamedIndividual.

    Args:
        graph_path (str): The path to the knowledge graph file (local path or URL).
        format (str, optional): The format of the graph file (e.g., 'xml', 'turtle', 'json-ld').
                                 If None, rdflib will try to guess the format based on the file extension.

    Returns:
        list: A list of rdflib.URIRef or rdflib.BNode objects representing the entities.
    """
    return []


def parse_types_graph(g):
    types = set()

    # Query for classes defined using rdfs:Class
    for s, p, o in g.triples((None, RDF.type, RDFS.Class)):
        types.add(s)

    # Query for classes defined using owl:Class
    for s, p, o in g.triples((None, RDF.type, OWL.Class)):
        types.add(s)

    # You might also find classes as subjects of rdfs:subClassOf triples
    # Although this doesn't strictly define a class, it implies the subject is a class
    for s, p, o in g.triples((None, RDFS.subClassOf, None)):
        types.add(s)
        # Also add the object, as it must also be a class
        if isinstance(o, term.URIRef) or isinstance(o, term.BNode):
            types.add(o)

    return sorted(list(types))


def parse_relations_graph(g):
    predicates = set()

    # Query for resources explicitly typed as properties
    property_types = [
        RDF.Property,
        OWL.ObjectProperty,
        OWL.DatatypeProperty,
        OWL.AnnotationProperty,
    ]

    for prop_type in property_types:
        for s, p, o in g.triples((None, RDF.type, prop_type)):
            predicates.add(s)

    # Also consider any URI or BNode that is used as a predicate in any triple
    # These are implicitly properties, even if not explicitly typed
    for s, p, o in g.triples((None, None, None)):
        if isinstance(p, term.URIRef) or isinstance(p, term.BNode):
            predicates.add(p)

    return sorted(list(predicates))


@tool
def get_types_from_ontology() -> list[URIRef]:
    """
    Loads an ontology and returns a list of defined class URIs.

    Args:
        ontology_path (str): The path to the ontology file (local path or URL).
        format (str, optional): The format of the ontology file (e.g., 'xml', 'turtle', 'json-ld').
                                 If None, rdflib will try to guess the format based on the file extension.

    Returns:
        list: A list of rdflib.URIRef objects representing the defined classes.
    """
    g = Graph()

    try:
        g.parse("./sources/cacao.owl", format=None)
    except Exception as e:
        print(f"Error loading ontology: {e}")
        return []

    return parse_types_graph(g)


@tool
def get_relations_from_ontology() -> list[URIRef]:
    """
    Loads an ontology and returns a list of defined predicate (property) URIs.

    Args:
        ontology_path (str): The path to the ontology file (local path or URL).
        format (str, optional): The format of the ontology file (e.g., 'xml', 'turtle', 'json-ld').
                                 If None, rdflib will try to guess the format based on the file extension.

    Returns:
        list: A list of rdflib.URIRef objects representing the defined predicates.
    """
    g = Graph()

    try:
        g.parse("./sources/cacao.owl", format=None)
    except Exception as e:
        print(f"Error loading ontology: {e}")
        return []

    return parse_relations_graph(g)