1
- use std:: { borrow:: Cow , sync:: LazyLock } ;
2
-
3
- use regex:: Regex ;
1
+ use std:: borrow:: Cow ;
4
2
5
3
/// Wrapper around a [`Cow<str>`] guaranteeing that the underlying text satisfies [RFC3987].
6
4
///
@@ -9,12 +7,6 @@ use regex::Regex;
9
7
pub struct Iri < ' a > ( Cow < ' a , str > ) ;
10
8
11
9
impl < ' a > Iri < ' a > {
12
- /// Return a new [`Iri`] if the argument is a valid IRI, otherwise None.
13
- pub fn new ( txt : impl Into < Cow < ' a , str > > ) -> Option < Self > {
14
- let inner = txt. into ( ) ;
15
- IRI_REGEX . is_match ( & inner) . then_some ( Iri ( inner) )
16
- }
17
-
18
10
/// Return a new [`Iri`], assuming the argument is a valid IRI.
19
11
pub fn new_unchecked ( txt : impl Into < Cow < ' a , str > > ) -> Self {
20
12
Iri ( txt. into ( ) )
@@ -86,142 +78,6 @@ impl std::fmt::Display for Iri<'_> {
86
78
}
87
79
}
88
80
89
- pub ( crate ) static IRI_REGEX : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( IRI_REGEX_SRC ) . unwrap ( ) ) ;
90
-
91
- /// Match an absolute IRI reference.
92
- pub static IRI_REGEX_SRC : & str = r"(?x)^
93
- #scheme
94
- ( # CAPTURE scheme
95
- [A-Za-z] [-A-Za-z0-9+.]*
96
- )
97
- :
98
- #ihier_part
99
- (?: #iauthority + ipath_abempty
100
- //
101
- ( # CAPTURE iauthority
102
- (?: # iuserinfo
103
- (?: [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:]
104
- |
105
- %[0-9a-fA-F]{2}
106
- )*
107
- @
108
- )?
109
- # ihost
110
- (?: # ip_literal
111
- \[
112
- (?: # ipv6address
113
- (?:
114
- (?:[0-9a-fA-F]{1,4}:){6}
115
- (?:[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}|(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))(?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3})
116
- |
117
- ::
118
- (?:[0-9a-fA-F]{1,4}:){5}
119
- (?:[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}|(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))(?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3})
120
- |
121
- (?:[0-9a-fA-F]{1,4})?
122
- ::
123
- (?:[0-9a-fA-F]{1,4}:){4}
124
- (?:[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}|(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))(?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3})
125
- |
126
- (?:(?:[0-9a-fA-F]{1,4}:){0,1}:[0-9a-fA-F]{1,4})?
127
- ::
128
- (?:[0-9a-fA-F]{1,4}:){3}
129
- (?:[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}|(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))(?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3})
130
- |
131
- (?:(?:[0-9a-fA-F]{1,4}:){0,2}:[0-9a-fA-F]{1,4})?
132
- ::
133
- (?:[0-9a-fA-F]{1,4}:){2}
134
- (?:[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}|(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))(?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3})
135
- |
136
- (?:(?:[0-9a-fA-F]{1,4}:){0,3}:[0-9a-fA-F]{1,4})?
137
- ::
138
- [0-9a-fA-F]{1,4}:
139
- (?:[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}|(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))(?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3})
140
- |
141
- (?:(?:[0-9a-fA-F]{1,4}:){0,4}:[0-9a-fA-F]{1,4})?
142
- ::
143
- (?:[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}|(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))(?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3})
144
- |
145
- (?:(?:[0-9a-fA-F]{1,4}:){0,5}:[0-9a-fA-F]{1,4})?
146
- ::
147
- [0-9a-fA-F]{1,4}
148
- |
149
- (?:(?:[0-9a-fA-F]{1,4}:){0,6}:[0-9a-fA-F]{1,4})?
150
- ::
151
- )
152
- | # ipvfuture
153
- v[0-9a-fA-F]+ \. [-A-Za-z0-9._~!$&'()*+,;=:]+
154
- )
155
- \]
156
- | # ipv4address
157
- (?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5])) (?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3}
158
- | # ireg_name
159
- (?: [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=]
160
- | %[0-9a-fA-F]{2}
161
- )*
162
- )
163
- (?:
164
- :
165
- [0-9]* # port
166
- )?
167
- )
168
- #ipath_abempty
169
- ( # CAPTURE ipath_abempty
170
- (?:
171
- /
172
- (?: [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:@]
173
- | %[0-9a-fA-F]{2}
174
- )*
175
- )*
176
- )
177
- | #ipath_absolute
178
- ( # CAPTURE ipath_absolute
179
- /
180
- (?:
181
- (?: [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:@]
182
- | %[0-9a-fA-F]{2}
183
- )*
184
- (?:
185
- /
186
- (?: [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:@]
187
- | %[0-9a-fA-F]{2}
188
- )*
189
- )*
190
- )?
191
- )
192
- | #ipath_rootless
193
- ( # CAPTURE ipath_rootless
194
- (?: [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:@]
195
- | %[0-9a-fA-F]{2}
196
- )+
197
- (?:
198
- /
199
- (?: [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:@]
200
- | %[0-9a-fA-F]{2}
201
- )*
202
- )*
203
- )
204
- )? # optional because of ipath_empty
205
- (?: # ?iquery
206
- \?
207
- ( # CAPTURE iquery
208
- (?:
209
- [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:@'\u{E000}-\u{F8FF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}/?]
210
- | %[0-9a-fA-F]{2}
211
- )*
212
- )
213
- )?
214
- (?: # #ifragment
215
- \#
216
- ( # CAPTURE ifragment
217
- (?:
218
- [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:@/?]
219
- | %[0-9a-fA-F]{2}
220
- )*
221
- )
222
- )?
223
- $" ;
224
-
225
81
#[ cfg( test) ]
226
82
mod test {
227
83
use super :: * ;
@@ -250,63 +106,4 @@ mod test {
250
106
let iri1 = Iri :: new_unchecked ( ex. to_string ( ) ) ;
251
107
assert_eq ! ( iri1. to_string( ) , format!( "<{ex}>" ) ) ;
252
108
}
253
-
254
- #[ test]
255
- fn regex ( ) {
256
- for txt in POSITIVE_IRIS {
257
- assert ! ( IRI_REGEX . is_match( txt) ) ;
258
- }
259
- for txt in NEGATIVE_IRIS {
260
- assert ! ( !IRI_REGEX . is_match( txt) ) ;
261
- }
262
- }
263
-
264
- /// An array of valid IRIs
265
- pub const POSITIVE_IRIS : & [ & str ] = & [
266
- "http:" ,
267
- "http://example.org" ,
268
- "http://127.0.0.1" ,
269
- "http://[::]" ,
270
- "http://%0D" ,
271
- "http://example.org/" ,
272
- "http://éxample.org/" ,
273
- "http://user:[email protected] :1234/" ,
274
- "http://example.org/foo/bar/baz" ,
275
- "http://example.org/foo/bar/" ,
276
- "http://example.org/foo/bar/bàz" ,
277
- "http://example.org/foo/.././/bar" ,
278
- "http://example.org/!$&'()*+,=:@/foo%0D" ,
279
- "http://example.org/?abc" ,
280
- "http://example.org/?!$&'()*+,=:@/?\u{E000} " ,
281
- "http://example.org/#def" ,
282
- "http://example.org/?abc#def" ,
283
- "tag:abc/def" ,
284
- "tag:" ,
285
- "http://example.org/#Andr%C3%A9" ,
286
- "http://example.org/?Andr%C3%A9" ,
287
- ] ;
288
-
289
- /// An array of invalid IRIs.
290
- pub const NEGATIVE_IRIS : & [ & str ] = & [
291
- // valid IRI references that are not IRIs (relative)
292
- "foo" ,
293
- ".." ,
294
- "//example.org" ,
295
- "?" ,
296
- "#" ,
297
- "?#" ,
298
- "?Andr%C3%A9#Andr%C3%A9" ,
299
- // invalid IRI references
300
- "http://[/" ,
301
- "http://a/[" ,
302
- "http://a/]" ,
303
- "http://a/|" ,
304
- "http://a/ " ,
305
- "http://a/\u{E000} " ,
306
- "[" ,
307
- "]" ,
308
- "|" ,
309
- " " ,
310
- "\u{E000} " ,
311
- ] ;
312
109
}
0 commit comments