Skip to content

Commit 9c87439

Browse files
committedDec 3, 2020
feat: XML::Schema and RelaxNG creation accept optional ParseOptions
I'm trying out a new pattern, which is that the parsed object carries around the ParseOptions it was created with, which should make some testing a bit easier. I'm also not implementing the "config block" pattern in use for Documents, because I think the UX is weird and I'm hoping to change everything to use kwargs in a 2.0 release, anyway.
1 parent 025e891 commit 9c87439

File tree

9 files changed

+182
-44
lines changed

9 files changed

+182
-44
lines changed
 

‎ext/java/nokogiri/XmlRelaxng.java

+9-2
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
import org.jruby.RubyClass;
5757
import org.jruby.anno.JRubyClass;
5858
import org.jruby.runtime.ThreadContext;
59+
import org.jruby.runtime.builtin.IRubyObject;
5960
import org.w3c.dom.Document;
6061
import org.xml.sax.ErrorHandler;
6162
import org.xml.sax.SAXException;
@@ -78,11 +79,17 @@ private void setVerifier(Verifier verifier) {
7879
this.verifier = verifier;
7980
}
8081

81-
static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, Source source) {
82+
static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions) {
8283
Ruby runtime = context.getRuntime();
8384
XmlRelaxng xmlRelaxng = (XmlRelaxng) NokogiriService.XML_RELAXNG_ALLOCATOR.allocate(runtime, klazz);
85+
86+
if (parseOptions == null) {
87+
parseOptions = defaultParseOptions(context.getRuntime());
88+
}
89+
8490
xmlRelaxng.setInstanceVariable("@errors", runtime.newEmptyArray());
85-
91+
xmlRelaxng.setInstanceVariable("@parse_options", parseOptions);
92+
8693
try {
8794
Schema schema = xmlRelaxng.getSchema(source, context);
8895
xmlRelaxng.setVerifier(schema.newVerifier());

‎ext/java/nokogiri/XmlSchema.java

+34-13
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,16 @@ private void setValidator(Validator validator) {
106106
this.validator = validator;
107107
}
108108

109-
static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, Source source) {
109+
static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions) {
110110
Ruby runtime = context.getRuntime();
111111
XmlSchema xmlSchema = (XmlSchema) NokogiriService.XML_SCHEMA_ALLOCATOR.allocate(runtime, klazz);
112+
113+
if (parseOptions == null) {
114+
parseOptions = defaultParseOptions(context.getRuntime());
115+
}
116+
112117
xmlSchema.setInstanceVariable("@errors", runtime.newEmptyArray());
118+
xmlSchema.setInstanceVariable("@parse_options", parseOptions);
113119

114120
try {
115121
SchemaErrorHandler error_handler = new SchemaErrorHandler(context.getRuntime(), (RubyArray)xmlSchema.getInstanceVariable("@errors"));
@@ -121,14 +127,24 @@ static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, So
121127
}
122128
}
123129

130+
protected static IRubyObject defaultParseOptions(Ruby runtime) {
131+
return ((RubyClass)runtime.getClassFromPath("Nokogiri::XML::ParseOptions")).getConstant("DEFAULT_SCHEMA");
132+
}
133+
124134
/*
125135
* call-seq:
126136
* from_document(doc)
127137
*
128138
* Create a new Schema from the Nokogiri::XML::Document +doc+
129139
*/
130-
@JRubyMethod(meta=true)
131-
public static IRubyObject from_document(ThreadContext context, IRubyObject klazz, IRubyObject document) {
140+
@JRubyMethod(meta=true, required=1, optional=1)
141+
public static IRubyObject from_document(ThreadContext context, IRubyObject klazz, IRubyObject[] args) {
142+
IRubyObject document = args[0];
143+
IRubyObject parseOptions = null;
144+
if (args.length > 1) {
145+
parseOptions = args[1];
146+
}
147+
132148
XmlDocument doc = ((XmlDocument) ((XmlNode) document).document(context));
133149

134150
RubyArray errors = (RubyArray) doc.getInstanceVariable("@errors");
@@ -144,25 +160,30 @@ public static IRubyObject from_document(ThreadContext context, IRubyObject klazz
144160
source.setSystemId(uri.convertToString().asJavaString());
145161
}
146162

147-
return getSchema(context, (RubyClass)klazz, source);
163+
return getSchema(context, (RubyClass)klazz, source, parseOptions);
148164
}
149165

150-
private static IRubyObject getSchema(ThreadContext context, RubyClass klazz, Source source) {
166+
@JRubyMethod(meta=true, required=1, optional=1)
167+
public static IRubyObject read_memory(ThreadContext context, IRubyObject klazz, IRubyObject[] args) {
168+
IRubyObject content = args[0];
169+
IRubyObject parseOptions = null;
170+
if (args.length > 1) {
171+
parseOptions = args[1];
172+
}
173+
String data = content.convertToString().asJavaString();
174+
return getSchema(context, (RubyClass) klazz, new StreamSource(new StringReader(data)), parseOptions);
175+
}
176+
177+
private static IRubyObject getSchema(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions) {
151178
String moduleName = klazz.getName();
152179
if ("Nokogiri::XML::Schema".equals(moduleName)) {
153-
return XmlSchema.createSchemaInstance(context, klazz, source);
180+
return XmlSchema.createSchemaInstance(context, klazz, source, parseOptions);
154181
} else if ("Nokogiri::XML::RelaxNG".equals(moduleName)) {
155-
return XmlRelaxng.createSchemaInstance(context, klazz, source);
182+
return XmlRelaxng.createSchemaInstance(context, klazz, source, parseOptions);
156183
}
157184
return context.getRuntime().getNil();
158185
}
159186

160-
@JRubyMethod(meta=true)
161-
public static IRubyObject read_memory(ThreadContext context, IRubyObject klazz, IRubyObject content) {
162-
String data = content.convertToString().asJavaString();
163-
return getSchema(context, (RubyClass) klazz, new StreamSource(new StringReader(data)));
164-
}
165-
166187
@JRubyMethod(visibility=Visibility.PRIVATE)
167188
public IRubyObject validate_document(ThreadContext context, IRubyObject document) {
168189
return validate_document_or_file(context, (XmlDocument)document);

‎ext/nokogiri/xml_relax_ng.c

+28-11
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,24 @@ static VALUE validate_document(VALUE self, VALUE document)
5353
*
5454
* Create a new RelaxNG from the contents of +string+
5555
*/
56-
static VALUE read_memory(VALUE klass, VALUE content)
56+
static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
5757
{
58-
xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt(
59-
(const char *)StringValuePtr(content),
60-
(int)RSTRING_LEN(content)
61-
);
58+
VALUE content;
59+
VALUE parse_options;
60+
xmlRelaxNGParserCtxtPtr ctx;
6261
xmlRelaxNGPtr schema;
63-
VALUE errors = rb_ary_new();
62+
VALUE errors;
6463
VALUE rb_schema;
64+
int scanned_args = 0;
65+
66+
scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
67+
if (scanned_args == 1) {
68+
parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
69+
}
6570

71+
ctx = xmlRelaxNGNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
72+
73+
errors = rb_ary_new();
6674
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
6775

6876
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
@@ -90,6 +98,7 @@ static VALUE read_memory(VALUE klass, VALUE content)
9098

9199
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
92100
rb_iv_set(rb_schema, "@errors", errors);
101+
rb_iv_set(rb_schema, "@parse_options", parse_options);
93102

94103
return rb_schema;
95104
}
@@ -100,18 +109,25 @@ static VALUE read_memory(VALUE klass, VALUE content)
100109
*
101110
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
102111
*/
103-
static VALUE from_document(VALUE klass, VALUE document)
112+
static VALUE from_document(int argc, VALUE *argv, VALUE klass)
104113
{
114+
VALUE document;
115+
VALUE parse_options;
105116
xmlDocPtr doc;
106117
xmlRelaxNGParserCtxtPtr ctx;
107118
xmlRelaxNGPtr schema;
108119
VALUE errors;
109120
VALUE rb_schema;
121+
int scanned_args = 0;
122+
123+
scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
110124

111125
Data_Get_Struct(document, xmlDoc, doc);
126+
doc = doc->doc; /* In case someone passes us a node. ugh. */
112127

113-
/* In case someone passes us a node. ugh. */
114-
doc = doc->doc;
128+
if (scanned_args == 1) {
129+
parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
130+
}
115131

116132
ctx = xmlRelaxNGNewDocParserCtxt(doc);
117133

@@ -143,6 +159,7 @@ static VALUE from_document(VALUE klass, VALUE document)
143159

144160
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
145161
rb_iv_set(rb_schema, "@errors", errors);
162+
rb_iv_set(rb_schema, "@parse_options", parse_options);
146163

147164
return rb_schema;
148165
}
@@ -156,7 +173,7 @@ void init_xml_relax_ng()
156173

157174
cNokogiriXmlRelaxNG = klass;
158175

159-
rb_define_singleton_method(klass, "read_memory", read_memory, 1);
160-
rb_define_singleton_method(klass, "from_document", from_document, 1);
176+
rb_define_singleton_method(klass, "read_memory", read_memory, -1);
177+
rb_define_singleton_method(klass, "from_document", from_document, -1);
161178
rb_define_private_method(klass, "validate_document", validate_document, 1);
162179
}

‎ext/nokogiri/xml_schema.c

+34-12
Original file line numberDiff line numberDiff line change
@@ -93,23 +93,34 @@ static VALUE validate_file(VALUE self, VALUE rb_filename)
9393
*
9494
* Create a new Schema from the contents of +string+
9595
*/
96-
static VALUE read_memory(VALUE klass, VALUE content)
96+
static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
9797
{
98+
VALUE content;
99+
VALUE parse_options;
100+
int parse_options_int;
101+
xmlSchemaParserCtxtPtr ctx;
98102
xmlSchemaPtr schema;
99-
xmlSchemaParserCtxtPtr ctx = xmlSchemaNewMemParserCtxt(
100-
(const char *)StringValuePtr(content),
101-
(int)RSTRING_LEN(content)
102-
);
103+
VALUE errors;
103104
VALUE rb_schema;
104-
VALUE errors = rb_ary_new();
105+
int scanned_args = 0;
106+
107+
scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
108+
if (scanned_args == 1) {
109+
parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
110+
}
111+
parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
112+
113+
ctx = xmlSchemaNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
114+
115+
errors = rb_ary_new();
105116
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
106117

107118
#ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
108119
xmlSchemaSetParserStructuredErrors(
109120
ctx,
110121
Nokogiri_error_array_pusher,
111122
(void *)errors
112-
);
123+
);
113124
#endif
114125

115126
schema = xmlSchemaParse(ctx);
@@ -129,6 +140,7 @@ static VALUE read_memory(VALUE klass, VALUE content)
129140

130141
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
131142
rb_iv_set(rb_schema, "@errors", errors);
143+
rb_iv_set(rb_schema, "@parse_options", parse_options);
132144

133145
return rb_schema;
134146
}
@@ -164,18 +176,27 @@ static int has_blank_nodes_p(VALUE cache)
164176
*
165177
* Create a new Schema from the Nokogiri::XML::Document +doc+
166178
*/
167-
static VALUE from_document(VALUE klass, VALUE document)
179+
static VALUE from_document(int argc, VALUE *argv, VALUE klass)
168180
{
181+
VALUE document;
182+
VALUE parse_options;
183+
int parse_options_int;
169184
xmlDocPtr doc;
170185
xmlSchemaParserCtxtPtr ctx;
171186
xmlSchemaPtr schema;
172187
VALUE errors;
173188
VALUE rb_schema;
189+
int scanned_args = 0;
190+
191+
scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
174192

175193
Data_Get_Struct(document, xmlDoc, doc);
194+
doc = doc->doc; /* In case someone passes us a node. ugh. */
176195

177-
/* In case someone passes us a node. ugh. */
178-
doc = doc->doc;
196+
if (scanned_args == 1) {
197+
parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
198+
}
199+
parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
179200

180201
if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) {
181202
rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
@@ -211,6 +232,7 @@ static VALUE from_document(VALUE klass, VALUE document)
211232

212233
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
213234
rb_iv_set(rb_schema, "@errors", errors);
235+
rb_iv_set(rb_schema, "@parse_options", parse_options);
214236

215237
return rb_schema;
216238

@@ -226,8 +248,8 @@ void init_xml_schema()
226248

227249
cNokogiriXmlSchema = klass;
228250

229-
rb_define_singleton_method(klass, "read_memory", read_memory, 1);
230-
rb_define_singleton_method(klass, "from_document", from_document, 1);
251+
rb_define_singleton_method(klass, "read_memory", read_memory, -1);
252+
rb_define_singleton_method(klass, "from_document", from_document, -1);
231253

232254
rb_define_private_method(klass, "validate_document", validate_document, 1);
233255
rb_define_private_method(klass, "validate_file", validate_file, 1);

‎lib/nokogiri/xml/parse_options.rb

+2
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ class ParseOptions
7373
DEFAULT_XML = RECOVER | NONET
7474
# the default options used for parsing HTML documents
7575
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
76+
# the default options used for parsing XML schemas
77+
DEFAULT_SCHEMA = NONET
7678

7779
attr_accessor :options
7880
def initialize options = STRICT

‎lib/nokogiri/xml/relax_ng.rb

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ class << self
55
###
66
# Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
77
# See Nokogiri::XML::RelaxNG for an example.
8-
def RelaxNG string_or_io
9-
RelaxNG.new(string_or_io)
8+
def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
9+
RelaxNG.new(string_or_io, options)
1010
end
1111
end
1212

‎lib/nokogiri/xml/schema.rb

+6-4
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ class << self
55
###
66
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
77
# object.
8-
def Schema string_or_io
9-
Schema.new(string_or_io)
8+
def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
9+
Schema.new(string_or_io, options)
1010
end
1111
end
1212

@@ -30,12 +30,14 @@ def Schema string_or_io
3030
class Schema
3131
# Errors while parsing the schema file
3232
attr_accessor :errors
33+
# The Nokogiri::XML::ParseOptions used to parse the schema
34+
attr_accessor :parse_options
3335

3436
###
3537
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
3638
# object.
37-
def self.new string_or_io
38-
from_document Nokogiri::XML(string_or_io)
39+
def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA
40+
from_document(Nokogiri::XML(string_or_io), options)
3941
end
4042

4143
###

‎test/xml/test_relax_ng.rb

+34
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,40 @@ def test_parse_with_io
2626
assert_equal 0, xsd.errors.length
2727
end
2828

29+
def test_constructor_method_with_parse_options
30+
schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE))
31+
assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
32+
33+
schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover)
34+
assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
35+
end
36+
37+
def test_new_with_parse_options
38+
schema = Nokogiri::XML::RelaxNG.new(File.read(ADDRESS_SCHEMA_FILE))
39+
assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
40+
41+
schema = Nokogiri::XML::RelaxNG.new(File.read(ADDRESS_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover)
42+
assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
43+
end
44+
45+
def test_from_document_with_parse_options
46+
schema = Nokogiri::XML::RelaxNG.from_document(Nokogiri::XML::Document.parse(File.read(ADDRESS_SCHEMA_FILE)))
47+
assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
48+
49+
schema = Nokogiri::XML::RelaxNG.from_document(Nokogiri::XML::Document.parse(File.read(ADDRESS_SCHEMA_FILE)),
50+
Nokogiri::XML::ParseOptions.new.recover)
51+
assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
52+
end
53+
54+
def test_read_memory_with_parse_options
55+
schema = Nokogiri::XML::RelaxNG.read_memory(File.read(ADDRESS_SCHEMA_FILE))
56+
assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
57+
58+
schema = Nokogiri::XML::RelaxNG.read_memory(File.read(ADDRESS_SCHEMA_FILE),
59+
Nokogiri::XML::ParseOptions.new.recover)
60+
assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
61+
end
62+
2963
def test_parse_with_errors
3064
xml = File.read(ADDRESS_SCHEMA_FILE).sub(/name="/, 'name=')
3165
assert_raises(Nokogiri::XML::SyntaxError) {

‎test/xml/test_schema.rb

+33
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,39 @@ def test_new
109109
assert_instance_of Nokogiri::XML::Schema, xsd
110110
end
111111

112+
def test_schema_method_with_parse_options
113+
schema = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE))
114+
assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
115+
116+
schema = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover)
117+
assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
118+
end
119+
120+
def test_schema_new_with_parse_options
121+
schema = Nokogiri::XML::Schema.new(File.read(PO_SCHEMA_FILE))
122+
assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
123+
124+
schema = Nokogiri::XML::Schema.new(File.read(PO_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover)
125+
assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
126+
end
127+
128+
def test_schema_from_document_with_parse_options
129+
schema = Nokogiri::XML::Schema.from_document(Nokogiri::XML::Document.parse(File.read(PO_SCHEMA_FILE)))
130+
assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
131+
132+
schema = Nokogiri::XML::Schema.from_document(Nokogiri::XML::Document.parse(File.read(PO_SCHEMA_FILE)),
133+
Nokogiri::XML::ParseOptions.new.recover)
134+
assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
135+
end
136+
137+
def test_schema_read_memory_with_parse_options
138+
schema = Nokogiri::XML::Schema.read_memory(File.read(PO_SCHEMA_FILE))
139+
assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
140+
141+
schema = Nokogiri::XML::Schema.read_memory(File.read(PO_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover)
142+
assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
143+
end
144+
112145
def test_parse_with_io
113146
xsd = nil
114147
File.open(PO_SCHEMA_FILE, "rb") { |f|

0 commit comments

Comments
 (0)
Please sign in to comment.