This commit is contained in:
Normal file
Normal file
@ -0,0 +1,48 @@
package sax
#cgo pkg-config: libxml-2.0
#include <libxml/tree.h>
#include <libxml/parser.h>
extern void goStartDocument(void*);
extern void goEndDocument(void*);
extern void goStartElement(void*, const xmlChar*, const xmlChar**, int);
extern void goStartElementNoAttr(void*, const xmlChar*);
extern void goEndElement(void*, const xmlChar*);
extern void goCharacters(void*, const xmlChar*, int);
extern void goCharactersRaw(void*, const xmlChar*, int);
void startDocumentCgo(void* user_data) {
void endDocumentCgo(void* user_data) {
void startElementCgo(void* user_data,
const xmlChar* name,
const xmlChar** attrs) {
// The attrs array is terminated with a NULL pointer. To make it usable in
// Go, we find the length and pass it explicitly to the Go callback.
int i = 0;
if (attrs != NULL) {
while (attrs[i] != NULL) {
goStartElement(user_data, name, attrs, i);
void startElementNoAttrCgo(void* user_data,
const xmlChar* name,
const xmlChar** attrs) {
goStartElementNoAttr(user_data, name);
void endElementCgo(void* user_data, const xmlChar* name) {
goEndElement(user_data, name);
void charactersCgo(void* user_data, const xmlChar* ch, int len) {
goCharacters(user_data, ch, len);
void charactersRawCgo(void* user_data, const xmlChar* ch, int len) {
goCharactersRaw(user_data, ch, len);
import "C"
Normal file
Normal file
@ -0,0 +1,213 @@
package sax
import (
#cgo pkg-config: libxml-2.0
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
extern void startDocumentCgo(void*);
extern void endDocumentCgo(void*);
extern void startElementCgo(void*, const xmlChar*, const xmlChar**);
extern void startElementNoAttrCgo(void*, const xmlChar*, const xmlChar**);
extern void endElementCgo(void*, const xmlChar*);
extern void charactersCgo(void*, const xmlChar*, int);
extern void charactersRawCgo(void*, const xmlChar*, int);
// Since this structure contains pointers, take extra care to zero it out
// before passing it to Go code.
static inline xmlSAXHandler newHandlerStruct() {
xmlSAXHandler h = {0};
return h;
// Wrap a C macro in a function callable from Go.
static inline xmlError* getLastError() {
return xmlGetLastError();
import "C"
import ""
// Used to ensure that xmlInitParser is only called once.
var initOnce sync.Once
func init() {
initOnce.Do(func() {
// SaxCallbacks collects callback functions to invoke on SAX events. Only
// populate callbacks you're interested in - callbacks left as nil will not
// be registered with the C layer and may save processing time.
// Some callbacks override others for optimization purposes - check the comments
// for more information.
type SaxCallbacks struct {
// StartDocument is invoked on the "start document" event.
StartDocument StartDocumentFunc
// EndDocument is invoked on the "end document" event
EndDocument EndDocumentFunc
// StartElement is invoked whenever the beginning of a new element is found.
// name will be the element name, and attrs a slice of attributes where
// attribute names alternate with values. For example, given the element
// <elem foo="bar" id="100"> the callback will get name="elem" and
// attrs=["foo", "bar", "id", "100"].
StartElement StartElementFunc
// StartElementNoAttr will override StartElement, if set. When you don't
// care about the attributes of an element, use this one - it will be faster
// because it doesn't have to do attribute unpacking, which is expensive.
StartElementNoAttr StartElementNoAttrFunc
// EndElement is invoked at the end of parsing an element (after closing tag
// has been processed), with name being the element name.
EndElement EndElementFunc
// Characters is invoked on character data inside elements. contents is the
// data, as string. Note that this callback may be invoked multiple times
// within a single tag.
Characters CharactersFunc
// CharactersRaw will override Characters, if set. It doesn't translate XML
// data into a Go string, but leaves it as an opaque pair of (ch, chlen),
// which you could use UnpackString to convert to a string if needed. This
// could be a useful optimization if you're only occasionally interested in
// the contents of character data.
CharactersRaw CharactersRawFunc
type StartDocumentFunc func()
type EndDocumentFunc func()
type StartElementFunc func(name string, attrs []string)
type StartElementNoAttrFunc func(name string)
type EndElementFunc func(name string)
type CharactersFunc func(contents string)
type CharactersRawFunc func(ch unsafe.Pointer, chlen int)
// UnpackString unpacks the opaque ch, chlen pair (that some callbacks in
// this package may create) into a Go string.
func UnpackString(ch unsafe.Pointer, chlen int) string {
return C.GoStringN((*C.char)(ch),
// ParseFile parses an XML file with the given name using SAX, with cb as
// the callbacks. The file name is required, rather than a reader, because it
// gets passed directly to the C layer.
func ParseFile(filename string, cb SaxCallbacks) error {
var cfilename *C.char = C.CString(filename)
// newHandlerStruct zeroes out all the pointers; we assign only those that
// are passed as non-nil in SaxCallbacks.
SAXhandler := C.newHandlerStruct()
if cb.StartDocument != nil {
SAXhandler.startDocument = C.startDocumentSAXFunc(C.startDocumentCgo)
if cb.EndDocument != nil {
SAXhandler.endDocument = C.endDocumentSAXFunc(C.endDocumentCgo)
if cb.StartElement != nil {
SAXhandler.startElement = C.startElementSAXFunc(C.startElementCgo)
// StartElementNoAttr overrides StartElement
if cb.StartElementNoAttr != nil {
SAXhandler.startElement = C.startElementSAXFunc(C.startElementNoAttrCgo)
if cb.EndElement != nil {
SAXhandler.endElement = C.endElementSAXFunc(C.endElementCgo)
if cb.Characters != nil {
SAXhandler.characters = C.charactersSAXFunc(C.charactersCgo)
// CharactersRaw overrides Characters
if cb.CharactersRaw != nil {
SAXhandler.characters = C.charactersSAXFunc(C.charactersRawCgo)
// Pack the callbacks structure into an opaque unsafe.Pointer which we'll
// pass to C as user_data, and C will pass it back to our Go callbacks.
user_data := pointer.Save(&cb)
defer pointer.Unref(user_data)
rc := C.xmlSAXUserParseFile(&SAXhandler, user_data, cfilename)
if rc != 0 {
xmlErr := C.getLastError()
msg := strings.TrimSpace(C.GoString(xmlErr.message))
return fmt.Errorf("line %v: error: %v", xmlErr.line, msg)
return nil
//export goStartDocument
func goStartDocument(user_data unsafe.Pointer) {
gcb := pointer.Restore(user_data).(*SaxCallbacks)
//export goEndDocument
func goEndDocument(user_data unsafe.Pointer) {
gcb := pointer.Restore(user_data).(*SaxCallbacks)
//export goStartElement
func goStartElement(user_data unsafe.Pointer, name *C.char, attrs **C.char, attrlen {
// Passing attrs to Go is tricky because it's an array of C strings,
// terminated with a NULL pointer. The C callback startElementCgo calculates
// the length of the array and passes it in as attrlen. We still have to
// convert it to a Go slice, by mapping a slice on the underlying storage
// and copying the attributes, one by one. This is all rather expensive, so
// consider using the StartElementNoAttr callback instead, when applicable.
gcb := pointer.Restore(user_data).(*SaxCallbacks)
length := int(attrlen)
var goattrs []string
if length > 0 {
tmpslice := (*[1 << 30]*C.char)(unsafe.Pointer(attrs))[:length:length]
goattrs = make([]string, length)
for i, s := range tmpslice {
goattrs[i] = C.GoString(s)
gcb.StartElement(C.GoString(name), goattrs)
//export goStartElementNoAttr
func goStartElementNoAttr(user_data unsafe.Pointer, name *C.char) {
gcb := pointer.Restore(user_data).(*SaxCallbacks)
//export goEndElement
func goEndElement(user_data unsafe.Pointer, name *C.char) {
gcb := pointer.Restore(user_data).(*SaxCallbacks)
//export goCharacters
func goCharacters(user_data unsafe.Pointer, ch *C.char, chlen {
gcb := pointer.Restore(user_data).(*SaxCallbacks)
gcb.Characters(C.GoStringN(ch, chlen))
//export goCharactersRaw
func goCharactersRaw(user_data unsafe.Pointer, ch *C.char, chlen {
gcb := pointer.Restore(user_data).(*SaxCallbacks)
gcb.CharactersRaw(unsafe.Pointer(ch), int(chlen))
Normal file
Normal file
@ -0,0 +1,9 @@
package sax
import (
func Test1(t *testing.T) {
Normal file
Normal file
@ -0,0 +1,5 @@
module sax
go 1.13
require v0.1.0
Reference in New Issue
Block a user