* Re: [Caml-list] ocaml for the Semantic Web
2009-08-18 10:27 ocaml for the Semantic Web tumenjargal tsagaan
@ 2009-08-19 7:20 ` Sebastien Ferre
0 siblings, 0 replies; 2+ messages in thread
From: Sebastien Ferre @ 2009-08-19 7:20 UTC (permalink / raw)
To: tumenjargal tsagaan, caml-list
[-- Attachment #1: Type: text/plain, Size: 923 bytes --]
Hi,
I am also interested in processing semantic web languages in OCaml,
and I haven't found anything yet.
Some months ago, I wrote a parser for RDF files (using Xml-light).
This cannot be considered as an "API" for RDF, but the hard work of
analysing the RDF-XML is done (source file as attachment).
Sébastien
tumenjargal tsagaan wrote:
> Hi,
>
> (1) is there any specialized APIs for processing RDF as well as OWL file?
> (2) is there any similar API in Ocaml like XML-parsers from Java world?
>
> Thank you.
>
> Tumee.
>
>
>
> ------------------------------------------------------------------------
>
> _______________________________________________
> Caml-list mailing list. Subscription management:
> http://yquem.inria.fr/cgi-bin/mailman/listinfo/caml-list
> Archives: http://caml.inria.fr
> Beginner's list: http://groups.yahoo.com/group/ocaml_beginners
> Bug reports: http://caml.inria.fr/bin/caml-bugs
[-- Attachment #2: rdf.ml --]
[-- Type: text/plain, Size: 6105 bytes --]
(*
Extracting RDF statements from the XML structure
generated by the library 'xml-light'.
Author: Sébastien Ferré <ferre@irisa.fr>
Creation: 11/02/2009
*)
type uri = string
type id = string
type lang = string
type datatype = Plain of lang | Typed of uri
type thing = URI of uri | XMLLiteral of Xml.xml | Literal of string * datatype | Blank of id
type property = uri
type tree = Node of thing * (property * (uri option * tree)) list
type rdf = {
xmlns : (string * string) list;
trees : tree list
}
(* accessors *)
let subject (Node (s, _)) = s
let properties (Node (_, ps)) = ps
let all_objects (Node (_, ps)) p = List.fold_right (fun (p', (_,o')) res -> if p' = p then o'::res else res) ps []
let statement (Node (_, ps)) p o =
let (_, (uri, _)) = List.find (fun (p', (uri', Node (o', _))) -> p' = p && o' = o) ps in
uri
(* RDF vocabulary *)
(* namespace *)
let namespace = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
(* classes *)
let _XMLLiteral = "rdf:XMLLiteral"
let _Property = "rdf:Property"
let _Statement = "rdf:Statement"
let _Bag = "rdf:Bag"
let _Set = "rdf:Set"
let _Alt = "rdf:Alt"
let _List = "rdf:List"
(* properties *)
let _type = "rdf:type"
let _first = "rdf:first"
let _rest = "rdf:rest"
let _value = "rdf:value"
let _subject = "rdf:subject"
let _object = "rdf:object"
let _predicate = "rdf:predicate"
let _n n = "rdf:_" ^ string_of_int n
(* ressources *)
let _nil = "rdf:nil"
(* parsing *)
type parse_ctx = { base : string; lang : string}
exception Failure
exception Error
let parse_list p l =
List.rev
(List.fold_left
(fun res x -> try p x :: res with _ -> res)
[] l)
let default_ctx = { base = ""; lang = ""}
let get_ctx previous_ctx e =
{ base = (try Xml.attrib e "xml:base" with _ -> previous_ctx.base);
lang = (try Xml.attrib e "xml:lang" with _ -> previous_ctx.lang)}
let resolve ctx rel =
if String.contains rel ':'
then rel
else ctx.base ^ rel
let resolve_tag ctx tag =
if String.contains tag ':'
then tag
else ctx.base ^ "#" ^ tag
let isCoreSyntaxTerm x =
List.mem x ["rdf:RDF"; "rdf:ID"; "rdf:about"; "rdf:parseType"; "rdf:resource"; "rdf:nodeID"; "rdf:datatype"]
let isSyntaxTerm x =
isCoreSyntaxTerm x || List.mem x ["rdf:Description"; "rdf:li"]
let isOldTerm x = List.mem x ["rdf:aboutEach"; "rdf:aboutEachPrefix"; "rdf:bagID"]
let isNodeElementURI x = not (isCoreSyntaxTerm x || x = "rdf:li" || isOldTerm x)
let isPropertyElementURI x = not (isCoreSyntaxTerm x || x = "rdf:Description" || isOldTerm x)
let isPropertyAttributeURI x = not (isCoreSyntaxTerm x || x = "rdf:Description" || x = "rdf:li" || isOldTerm x)
let rec parse_RDF e =
if Xml.tag e = "rdf:RDF"
then {
xmlns =
List.fold_right
(fun (a,v) res ->
let i = try String.index a ':' with _ -> String.length a in
if String.sub a 0 i = "xmlns"
then
let ns =
if i = String.length a
then ""
else String.sub a (i+1) (String.length a - (i+1)) in
(ns,v)::res
else res)
(Xml.attribs e) [];
trees =
let ctx = get_ctx default_ctx e in
parse_list (parse_nodeElement ctx) (Xml.children e)
}
else raise Failure
and parse_nodeElement previous_ctx e =
let tag = Xml.tag e in
let ctx = get_ctx previous_ctx e in
if isNodeElementURI tag
then
let subject =
try URI (resolve ctx ("#" ^ Xml.attrib e "rdf:ID")) with _ ->
try Blank (Xml.attrib e "rdf:nodeID") with _ ->
try URI (resolve ctx (Xml.attrib e "rdf:about")) with _ ->
Blank "" in
let properties =
(if tag = "rdf:Description" then [] else [(_type, (None, Node (URI (resolve_tag ctx tag), [])))]) @
parse_list (parse_propertyAttr ctx) (Xml.attribs e) @
parse_list (parse_propertyElt ctx (ref 0)) (Xml.children e) in
Node (subject, properties)
else raise Failure
and parse_propertyAttr ctx (a,v) =
if isPropertyAttributeURI a
then
if a = _type
then (a, (None, Node (URI (resolve ctx v), [])))
else (a, (None, Node (Literal (v, Plain ctx.lang), [])))
else raise Failure
and parse_propertyElt previous_ctx cpt e =
incr cpt;
let tag = match Xml.tag e with "rdf:li" -> "_" ^ string_of_int !cpt | s -> s in
let ctx = get_ctx previous_ctx e in
if isPropertyElementURI tag
then
let reified = try Some (resolve ctx ("#" ^ Xml.attrib e "rdf:ID")) with _ -> None in
let children = Xml.children e in
try
match Xml.attrib e "rdf:parseType" with
| "Resource" -> (* parseTypeResourcePropertyElt *)
let properties = parse_list (parse_propertyElt ctx (ref 0)) children in
(tag, (reified, Node (Blank "", properties)))
| "Collection" -> (* parseTypeCollectionPropertyElt *)
let t =
List.fold_right
(fun n res ->
Node (Blank "",
[ (_first, (None, n));
(_rest, (None, res))]))
(parse_list (parse_nodeElement ctx) children)
(Node (URI _nil, [])) in
(tag, (reified, t))
| "Literal" (* parseTypeLiteralPropertyElt *)
| _ -> (* parseTypeOtherPropertyElt *)
let xml = match children with [n] -> n | _ -> raise Error in
(tag, (reified, Node (XMLLiteral xml, [])))
with _ ->
match children with
| [Xml.Element _ as n] -> (* resourcePropertyElt *)
let t = parse_nodeElement ctx n in
(tag, (reified, t))
| [Xml.PCData s] -> (* literalPropertyElt *)
let d =
try Typed (Xml.attrib e "rdf:datatype") with _ ->
Plain ctx.lang in
(tag, (reified, Node (Literal (s,d), [])))
| [] -> (* emptyPropertyElt *)
let attribs = Xml.attribs e in
( match attribs with
| []
| ["rdf:ID",_] ->
(tag, (reified, Node (Literal ("",Plain ctx.lang), [])))
| _ ->
let obj =
try URI (resolve ctx (Xml.attrib e "rdf:resource")) with _ ->
try Blank (Xml.attrib e "rdf:nodeID") with _ ->
Blank "" in
let properties = parse_list (parse_propertyAttr ctx) attribs in
(tag, (reified, Node (obj, properties)))
)
| _ -> raise Error
else raise Failure
let from_xml xml =
try parse_RDF xml with _ ->
{ xmlns = []; trees = [parse_nodeElement default_ctx xml]}
^ permalink raw reply [flat|nested] 2+ messages in thread