1
|
# pyatom.py -- PyAtom library module
|
2
|
|
3
|
"""
|
4
|
PyAtom
|
5
|
|
6
|
Module to make it really easy to create Atom syndication feeds.
|
7
|
|
8
|
This module is Copyright (C) 2006 Steve R. Hastings.
|
9
|
Licensed under the Academic Free License version 2.1
|
10
|
|
11
|
You might want to start with the test cases at the end; see how they
|
12
|
work, and then go back and look at the code in the module.
|
13
|
|
14
|
I hope you find this useful!
|
15
|
|
16
|
Steve R. Hastings
|
17
|
|
18
|
Please send your questions or comments to this email address:
|
19
|
|
20
|
pyatom@langri.com
|
21
|
"""
|
22
|
|
23
|
|
24
|
|
25
|
import re
|
26
|
import sys
|
27
|
import time
|
28
|
|
29
|
s_pyatom_name = "PyAtom"
|
30
|
s_pyatom_ver = "0.3.9"
|
31
|
s_pyatom_name_ver = "%s version %s" % (s_pyatom_name, s_pyatom_ver)
|
32
|
|
33
|
# string constants
|
34
|
# These string values are used in more than one place.
|
35
|
|
36
|
s_version = "version"
|
37
|
s_encoding = "encoding"
|
38
|
s_standalone = "standalone"
|
39
|
|
40
|
s_href = "href"
|
41
|
s_lang = "xml:lang"
|
42
|
s_link = "link"
|
43
|
s_term = "term"
|
44
|
s_type = "type"
|
45
|
|
46
|
|
47
|
|
48
|
def set_s_indent(s):
|
49
|
"""
|
50
|
Set up the globals PyAtom uses to indent its output:
|
51
|
s_indent, and s_indent_big
|
52
|
|
53
|
s_indent is the string to indent one level; default is \\t.
|
54
|
|
55
|
s_indent_big is s_indent concatenated many times. PyAtom uses slice
|
56
|
copies to get indent strings from s_indent_big.
|
57
|
"""
|
58
|
global s_indent
|
59
|
global s_indent_big
|
60
|
s_indent = s
|
61
|
s_indent_big = s*256
|
62
|
|
63
|
set_s_indent("\t")
|
64
|
|
65
|
|
66
|
|
67
|
class TFC(object):
|
68
|
"""
|
69
|
class TFC: Tag Format Control.
|
70
|
Controls how tags are converted to strings.
|
71
|
|
72
|
Arguments to __init__():
|
73
|
level Specifies what indent level to start at for output. Default 0.
|
74
|
mode Specifies how to format the output:
|
75
|
mode_terse -- minimal output (no indenting)
|
76
|
mode_normal -- default
|
77
|
mode_verbose -- normal, plus some XML comments
|
78
|
|
79
|
Normally, if an XML item has no data, nothing is printed, but with
|
80
|
mode_verbose you may get a comment like "Collection with 0 entries".
|
81
|
|
82
|
Methods:
|
83
|
b_print_all()
|
84
|
Return True if TFC set for full printing.
|
85
|
b_print_terse()
|
86
|
Return True if TFC set for terse printing.
|
87
|
b_print_verbose()
|
88
|
Return True if TFC set for verbose printing.
|
89
|
|
90
|
indent_by(incr)
|
91
|
Return a TFC instance that indents by incr columns.
|
92
|
s_indent(extra_indent=0)
|
93
|
Return an indent string.
|
94
|
"""
|
95
|
mode_terse, mode_normal, mode_verbose = range(3)
|
96
|
|
97
|
def __init__(self, level=0, mode=mode_normal):
|
98
|
"""
|
99
|
Arguments:
|
100
|
level Specifies what indent level to start at for output. Default 0.
|
101
|
mode Specifies how to format the output:
|
102
|
mode_terse -- minimal output (no indenting)
|
103
|
mode_normal -- default
|
104
|
mode_verbose -- normal, plus some XML comments
|
105
|
|
106
|
Normally, if an XML item has no data, nothing is printed, but with
|
107
|
mode_verbose you may get a comment like "Collection with 0 entries".
|
108
|
"""
|
109
|
self.level = level
|
110
|
self.mode = mode
|
111
|
|
112
|
def b_print_all(self):
|
113
|
"""
|
114
|
Return True if TFC set for full printing.
|
115
|
|
116
|
Some optional things are usually suppressed, but will be printed
|
117
|
if the current level is 0. And everything gets printed when
|
118
|
mode_verbose is set.
|
119
|
"""
|
120
|
return self.level == 0 or self.mode == TFC.mode_verbose
|
121
|
|
122
|
def b_print_terse(self):
|
123
|
"""
|
124
|
Return True if TFC set for terse printing.
|
125
|
"""
|
126
|
return self.mode == TFC.mode_terse
|
127
|
|
128
|
def b_print_verbose(self):
|
129
|
"""
|
130
|
Return True if TFC set for verbose printing.
|
131
|
"""
|
132
|
return self.mode == TFC.mode_verbose
|
133
|
|
134
|
def indent_by(self, incr):
|
135
|
"""
|
136
|
Return a TFC instance that indents by incr columns.
|
137
|
|
138
|
Pass this to a function that takes a TFC to get a temporary indent.
|
139
|
"""
|
140
|
return TFC(self.level + incr, self.mode)
|
141
|
def s_indent(self, extra_indent=0):
|
142
|
"""
|
143
|
Return an indent string.
|
144
|
|
145
|
Return a string of white space that indents correctly for the
|
146
|
current TFC settings. If specified, extra_indent will be added
|
147
|
to the current indent level.
|
148
|
"""
|
149
|
if self.mode == TFC.mode_terse:
|
150
|
return ""
|
151
|
level = self.level + extra_indent
|
152
|
return s_indent_big[0:level]
|
153
|
|
154
|
|
155
|
|
156
|
pat_nbsp = re.compile(r' ')
|
157
|
def s_entities_to_ws(s):
|
158
|
"""
|
159
|
Return a copy of s with HTML whitespace entities replaced by a space.
|
160
|
|
161
|
Currently just gets rid of HTML non-breaking spaces (" ").
|
162
|
"""
|
163
|
if not s:
|
164
|
return s
|
165
|
|
166
|
s = re.sub(pat_nbsp, " ", s)
|
167
|
return s
|
168
|
|
169
|
def s_normalize_ws(s):
|
170
|
"""
|
171
|
Return a copy of string s with each run of whitespace replaced by one space.
|
172
|
>>> s = "and now\n\n\nfor \t something\v completely\r\n different"
|
173
|
>>> print s_normalize_ws(s)
|
174
|
and now for something completely different
|
175
|
>>>
|
176
|
"""
|
177
|
lst = s.split()
|
178
|
s = " ".join(lst)
|
179
|
return s
|
180
|
|
181
|
|
182
|
def s_escape_html(s):
|
183
|
"""
|
184
|
Return a copy of string s with HTML codes escaped.
|
185
|
|
186
|
This is useful when you want HTML tags printed literally, rather than
|
187
|
interpreted.
|
188
|
|
189
|
>>> print s_escape_html("<head>")
|
190
|
<head>
|
191
|
>>> print s_escape_html(" ")
|
192
|
&nbsp;
|
193
|
"""
|
194
|
s = s.replace("&", "&")
|
195
|
s = s.replace("<", "<")
|
196
|
s = s.replace(">", ">")
|
197
|
return s
|
198
|
|
199
|
def s_create_atom_id(t, domain_name, uri=""):
|
200
|
"""
|
201
|
Create ID using Mark Pilgrim's algorithm.
|
202
|
|
203
|
Algorithm taken from here:
|
204
|
http://diveintomark.org/archives/2004/05/28/howto-atom-id
|
205
|
"""
|
206
|
|
207
|
# ymd (year-month-day) example: 2003-12-13
|
208
|
ymd = time.strftime("%Y-%m-%d", t)
|
209
|
|
210
|
if uri == "":
|
211
|
# mush (all mushed together) example: 20031213083000
|
212
|
mush = time.strftime("%Y%m%d%H%M%S", t)
|
213
|
uri = "/weblog/" + mush
|
214
|
|
215
|
# s = "tag:" + domain_name + "," + ymd + ":" + uri
|
216
|
s = "tag:%s,%s:%s" % (domain_name, ymd, uri)
|
217
|
|
218
|
s = s.replace("#", "/")
|
219
|
|
220
|
return s
|
221
|
|
222
|
s_copyright_multiyear = "Copyright %s %d-%d by %s."
|
223
|
s_copyright_oneyear = "Copyright %s %d by %s."
|
224
|
def s_copyright(s_owner, s_csym="(C)", end_year=None, start_year=None):
|
225
|
"""
|
226
|
Return a string with a copyright notice.
|
227
|
|
228
|
s_owner
|
229
|
string with copyright owner's name.
|
230
|
s_csym
|
231
|
string with copyright symbol. (An HTML entity might be good here.)
|
232
|
end_year
|
233
|
last year of the copyright. Default is the current year.
|
234
|
start_year
|
235
|
first year of the copyright.
|
236
|
|
237
|
If only end_year is specified, only print one year; if both end_year and
|
238
|
start_year are specified, print a range.
|
239
|
|
240
|
To localize the entire copyright message into another language, change
|
241
|
the global variables with the copyright template:
|
242
|
s_copyright_multiyear: for a year range
|
243
|
s_copyright_oneyear: for a single year
|
244
|
"""
|
245
|
if not end_year:
|
246
|
end_year = time.localtime().tm_year
|
247
|
|
248
|
if start_year:
|
249
|
return s_copyright_multiyear % (s_csym, start_year, end_year, s_owner)
|
250
|
|
251
|
return s_copyright_oneyear % (s_csym, end_year, s_owner)
|
252
|
|
253
|
|
254
|
|
255
|
# Here are all of the possible XML items.
|
256
|
#
|
257
|
# Supported by PyAtom:
|
258
|
# XML Declaration: <?xml ... ?>
|
259
|
# Comments: <!-- ... -->
|
260
|
# Elements: <tag_name>...</tag_name>
|
261
|
#
|
262
|
# Minimal support:
|
263
|
# Markup Declarations: <!KEYWORD ... >
|
264
|
# Processing Instructions (PIs): <?KEYWORD ... ?>
|
265
|
#
|
266
|
# Not currently supported:
|
267
|
# INCLUDE and IGNORE directives: <!KEYWORD[ ... ]]>
|
268
|
# CDATA sections: <![CDATA[ ... ]]>
|
269
|
#
|
270
|
|
271
|
class XMLItem(object):
|
272
|
"""
|
273
|
All PyAtom classes inherit from this class. All it does is provide a
|
274
|
few default methods, and be a root for the inheritance tree.
|
275
|
|
276
|
An XMLItem has several methods that return an XML tag representation of
|
277
|
its contents. Each XMLItem knows how to make a tag for itself. An
|
278
|
XMLItem that contains other XMLItems will ask each one to make a tag;
|
279
|
so asking the top-level XMLItem for a tag will cause the entire tree
|
280
|
of XMLItems to recursively make tags, and you get a full XML
|
281
|
representation with tags appropriately nested and indented.
|
282
|
"""
|
283
|
def _s_tag(self, tfc):
|
284
|
"""
|
285
|
A stub which must always be overridden by child classes.
|
286
|
"""
|
287
|
assert False, "XMLItem instance is too abstract to print."
|
288
|
|
289
|
def s_tag(self, level):
|
290
|
"""
|
291
|
Return the item as a string containing an XML tag declaration.
|
292
|
|
293
|
The XML tag will be indented.
|
294
|
Will return an empty string if the item is empty.
|
295
|
"""
|
296
|
tfc = TFC(level, TFC.mode_normal)
|
297
|
return self._s_tag(tfc)
|
298
|
|
299
|
def s_tag_verbose(self, level):
|
300
|
"""
|
301
|
Return the item as a string containing an XML tag declaration.
|
302
|
|
303
|
The XML tag will be indented.
|
304
|
May return an XML Comment if the item is empty.
|
305
|
"""
|
306
|
tfc = TFC(level, TFC.mode_verbose)
|
307
|
return self._s_tag(tfc)
|
308
|
|
309
|
def s_tag_terse(self, level):
|
310
|
"""
|
311
|
Return the item as a string containing an XML tag declaration.
|
312
|
|
313
|
The XML tag will not be indented.
|
314
|
Will return an empty string if the item is empty.
|
315
|
"""
|
316
|
tfc = TFC(level, TFC.mode_terse)
|
317
|
return self._s_tag(tfc)
|
318
|
|
319
|
def __str__(self):
|
320
|
return self.s_tag(0)
|
321
|
|
322
|
def level(self):
|
323
|
"""
|
324
|
Return an integer describing what level this tag is.
|
325
|
|
326
|
The root tag of an XML document is level 0; document-level comments
|
327
|
or other document-level declarations are also level 0. Tags nested
|
328
|
inside the root tag are level 1, tags nested inside those tags are
|
329
|
level 2, and so on.
|
330
|
|
331
|
This is currently only used by the s_tree() functions. When
|
332
|
printing tags normally, the code that walks the tree keeps track of
|
333
|
what level is current.
|
334
|
"""
|
335
|
level = 0
|
336
|
while self._parent != None:
|
337
|
self = self._parent
|
338
|
if self.is_element():
|
339
|
level += 1
|
340
|
return level
|
341
|
|
342
|
def s_name(self):
|
343
|
"""
|
344
|
Return a name for the current item.
|
345
|
|
346
|
Used only by the s_tree() functions.
|
347
|
"""
|
348
|
if self._name:
|
349
|
return self._name
|
350
|
return "unnamed_instance_of_" + type(self).__name__
|
351
|
|
352
|
def s_tree(self):
|
353
|
"""
|
354
|
Return a verbose tree showing the current tag and its children.
|
355
|
|
356
|
This is for debugging; it's not valid XML syntax.
|
357
|
"""
|
358
|
level = self.level()
|
359
|
return "%2d) %s\t%s" % (level, self.s_name(), str(self))
|
360
|
|
361
|
|
362
|
|
363
|
class DocItem(XMLItem):
|
364
|
"""
|
365
|
A document-level XML item (appearing above root element).
|
366
|
|
367
|
Items that can be document-level inherit from this class.
|
368
|
"""
|
369
|
pass
|
370
|
|
371
|
|
372
|
|
373
|
class ElementItem(XMLItem):
|
374
|
"""
|
375
|
An item that may be nested inside an element.
|
376
|
|
377
|
Items that can be nested inside other elements inherit from this class.
|
378
|
"""
|
379
|
pass
|
380
|
|
381
|
|
382
|
|
383
|
class Comment(DocItem,ElementItem):
|
384
|
"""
|
385
|
An XML comment.
|
386
|
|
387
|
Attributes:
|
388
|
text
|
389
|
set the text of the comment
|
390
|
"""
|
391
|
def __init__(self, text=""):
|
392
|
"""
|
393
|
text: set the text of the comment
|
394
|
"""
|
395
|
self._parent = None
|
396
|
self._name = ""
|
397
|
self.tag_name = "comment"
|
398
|
self.text = text
|
399
|
|
400
|
def _s_tag(self, tfc):
|
401
|
if not self:
|
402
|
if tfc.b_print_all():
|
403
|
return tfc.s_indent() + "<!-- -->"
|
404
|
else:
|
405
|
return ""
|
406
|
else:
|
407
|
if self.text.find("\n") >= 0:
|
408
|
lst = []
|
409
|
lst.append(tfc.s_indent() + "<!--")
|
410
|
lst.append(self.text)
|
411
|
lst.append(tfc.s_indent() + "-->")
|
412
|
return "\n".join(lst)
|
413
|
else:
|
414
|
s = "%s%s%s%s" % (tfc.s_indent(), "<!-- ", self.text, " -->")
|
415
|
return s
|
416
|
|
417
|
assert False, "not possible to reach this line."
|
418
|
|
419
|
def __nonzero__(self):
|
420
|
# Returns True if there is any comment text.
|
421
|
# Returns False otherwise.
|
422
|
return not not self.text
|
423
|
|
424
|
def is_element(self):
|
425
|
return True
|
426
|
|
427
|
|
428
|
|
429
|
# REVIEW: can a PI be an ElementItem?
|
430
|
class PI(DocItem):
|
431
|
"""
|
432
|
XML Processing Instruction (PI).
|
433
|
|
434
|
Attributes:
|
435
|
keyword
|
436
|
text
|
437
|
"""
|
438
|
def __init__(self):
|
439
|
self._parent = None
|
440
|
self._name = ""
|
441
|
self.keyword = ""
|
442
|
self.text = ""
|
443
|
|
444
|
def _s_tag(self, tfc):
|
445
|
if not self:
|
446
|
return ""
|
447
|
else:
|
448
|
if self.text.find("\n") >= 0:
|
449
|
lst = []
|
450
|
lst.append("%s%s%s" % (tfc.s_indent(), "<?", self.keyword))
|
451
|
lst.append(self.text)
|
452
|
lst.append("%s%s" % (tfc.s_indent(), "?>"))
|
453
|
return "\n".join(lst)
|
454
|
else:
|
455
|
s = "%s%s%s %s%s"% \
|
456
|
(tfc.s_indent(), "<?", self.keyword, self.text, "?>")
|
457
|
return s
|
458
|
|
459
|
assert False, "not possible to reach this line."
|
460
|
|
461
|
def __nonzero__(self):
|
462
|
# Returns True if there is any keyword.
|
463
|
# Returns False otherwise.
|
464
|
return not not self.keyword
|
465
|
|
466
|
|
467
|
|
468
|
# REVIEW: can a MarkupDecl be an ElementItem?
|
469
|
class MarkupDecl(DocItem):
|
470
|
"""
|
471
|
XML Markup Declaration.
|
472
|
|
473
|
Attributes:
|
474
|
keyword
|
475
|
text
|
476
|
"""
|
477
|
def __init__(self):
|
478
|
self._parent = None
|
479
|
self._name = ""
|
480
|
self.keyword = ""
|
481
|
self.text = ""
|
482
|
|
483
|
def _s_tag(self, tfc):
|
484
|
if not self:
|
485
|
return ""
|
486
|
else:
|
487
|
if self.text.find("\n") >= 0:
|
488
|
lst = []
|
489
|
lst.append("%s%s%s" % (tfc.s_indent(), "<!", self.keyword))
|
490
|
lst.append(self.text)
|
491
|
lst.append("%s%s" % (tfc.s_indent(), ">"))
|
492
|
return "\n".join(lst)
|
493
|
else:
|
494
|
s = "%s%s%s %s%s" % \
|
495
|
(tfc.s_indent(), "<!", self.keyword, self.text, ">")
|
496
|
return s
|
497
|
|
498
|
assert False, "not possible to reach this line."
|
499
|
|
500
|
def __nonzero__(self):
|
501
|
# Returns True if there is any keyword.
|
502
|
# Returns False otherwise.
|
503
|
return not not self.keyword
|
504
|
|
505
|
|
506
|
|
507
|
class CoreElement(ElementItem):
|
508
|
"""
|
509
|
This is an abstract class.
|
510
|
|
511
|
All of the XML element classes inherit from this.
|
512
|
"""
|
513
|
def __init__(self, tag_name, def_attr, def_attr_value, attr_names = []):
|
514
|
# dictionary of attributes and their values
|
515
|
self.lock = False
|
516
|
self._parent = None
|
517
|
self._name = ""
|
518
|
self.tag_name = tag_name
|
519
|
self.def_attr = def_attr
|
520
|
self.attrs = {}
|
521
|
if def_attr and def_attr_value:
|
522
|
self.attrs[def_attr] = def_attr_value
|
523
|
self.attr_names = attr_names
|
524
|
self.lock = True
|
525
|
|
526
|
def __nonzero__(self):
|
527
|
# Returns True if any attrs are set or there are any contents.
|
528
|
# Returns False otherwise.
|
529
|
return not not self.attrs or self.has_contents()
|
530
|
|
531
|
def text_check(self):
|
532
|
"""
|
533
|
Raise an exception, unless element has text contents.
|
534
|
|
535
|
Child classes that have text must override this to do nothing.
|
536
|
"""
|
537
|
raise TypeError, "element does not have text contents"
|
538
|
|
539
|
def nest_check(self):
|
540
|
"""
|
541
|
Raise an exception, unless element can nest other elements.
|
542
|
|
543
|
Child classes that can nest must override this to do nothing.
|
544
|
"""
|
545
|
raise TypeError, "element cannot nest other elements"
|
546
|
|
547
|
def __delattr__(self, name):
|
548
|
# REVIEW: this should be made to work!
|
549
|
raise TypeError, "cannot delete elements"
|
550
|
|
551
|
def __getattr__(self, name):
|
552
|
if name == "lock":
|
553
|
# If the "lock" hasn't been created yet, we always want it
|
554
|
# to be False, i.e. we are not locked.
|
555
|
return False
|
556
|
else:
|
557
|
raise AttributeError, name
|
558
|
|
559
|
def __setattr__(self, name, value):
|
560
|
# Here's how this works:
|
561
|
#
|
562
|
# 0) "self.lock" is a boolean, set to False during __init__()
|
563
|
# but turned True afterwards. When it's False, you can add new
|
564
|
# members to the class instance without any sort of checks; once
|
565
|
# it's set True, __setattr__() starts checking assignments.
|
566
|
# By default, when lock is True, you cannot add a new member to
|
567
|
# the class instance, and any assignment to an old member has to
|
568
|
# be of matching type. So if you say "a.text = string", the
|
569
|
# .text member has to exist and be a string member.
|
570
|
#
|
571
|
# This is the default __setattr__() for all element types. It
|
572
|
# gets overloaded by the __setattr__() in NestElement, because
|
573
|
# for nested elments, it makes sense to be able to add new
|
574
|
# elements nested inside.
|
575
|
#
|
576
|
# This is moderately nice. But later in NestElement there is a
|
577
|
# version of __setattr__() that is *very* nice; check it out.
|
578
|
#
|
579
|
# 1) This checks assignments to _parent, and makes sure they are
|
580
|
# plausible (either an XMLItem, or None).
|
581
|
|
582
|
try:
|
583
|
lock = self.lock
|
584
|
except AttributeError:
|
585
|
lock = False
|
586
|
|
587
|
if not lock:
|
588
|
self.__dict__[name] = value
|
589
|
return
|
590
|
|
591
|
dict = self.__dict__
|
592
|
if not name in dict:
|
593
|
# brand-new item
|
594
|
if lock:
|
595
|
raise TypeError, "element cannot nest other elements"
|
596
|
|
597
|
if name == "_parent":
|
598
|
if not (isinstance(value, XMLItem) or value is None):
|
599
|
raise TypeError, "only XMLItem or None is permitted"
|
600
|
self.__dict__[name] = value
|
601
|
return
|
602
|
|
603
|
# locked item so do checks
|
604
|
if not type(self.__dict__[name]) is type(value):
|
605
|
raise TypeError, "value is not the same type"
|
606
|
|
607
|
self.__dict__[name] = value
|
608
|
|
609
|
|
610
|
def has_contents(self):
|
611
|
return False
|
612
|
|
613
|
def multiline_contents(self):
|
614
|
return False
|
615
|
|
616
|
def s_contents(self, tfc):
|
617
|
assert False, "CoreElement is an abstract class; it has no contents."
|
618
|
|
619
|
def _s_start_tag_name_attrs(self, tfc):
|
620
|
"""
|
621
|
Return a string with the start tag name, and any attributes.
|
622
|
|
623
|
Wrap this in correct punctuation to get a start tag.
|
624
|
"""
|
625
|
def attr_newline(tfc):
|
626
|
if tfc.b_print_terse():
|
627
|
return " "
|
628
|
else:
|
629
|
return "\n" + tfc.s_indent(2)
|
630
|
|
631
|
lst = []
|
632
|
lst.append(self.tag_name)
|
633
|
|
634
|
if len(self.attrs) == 1:
|
635
|
# just one attr so do on one line
|
636
|
attr = self.attrs.keys()[0]
|
637
|
s_attr = '%s="%s"' % (attr, self.attrs[attr])
|
638
|
lst.append(" " + s_attr)
|
639
|
elif len(self.attrs) > 1:
|
640
|
# more than one attr so do a nice nested tag
|
641
|
# 0) show all attrs in the order of attr_names
|
642
|
for attr in self.attr_names:
|
643
|
if attr in self.attrs.keys():
|
644
|
s_attr = '%s="%s"' % (attr, self.attrs[attr])
|
645
|
lst.append(attr_newline(tfc) + s_attr)
|
646
|
# 1) any attrs not in attr_names? list them, too
|
647
|
for attr in self.attrs:
|
648
|
if not attr in self.attr_names:
|
649
|
s_attr = '%s="%s"' % (attr, self.attrs[attr])
|
650
|
lst.append(attr_newline(tfc) + s_attr)
|
651
|
|
652
|
return "".join(lst)
|
653
|
|
654
|
def _s_tag(self, tfc):
|
655
|
if not self:
|
656
|
if not tfc.b_print_all():
|
657
|
return ""
|
658
|
|
659
|
lst = []
|
660
|
|
661
|
lst.append(tfc.s_indent() + "<" + self._s_start_tag_name_attrs(tfc))
|
662
|
|
663
|
if not self.has_contents():
|
664
|
lst.append("/>")
|
665
|
else:
|
666
|
lst.append(">")
|
667
|
if self.multiline_contents():
|
668
|
s = "\n%s\n" % self.s_contents(tfc.indent_by(1))
|
669
|
lst.append(s + tfc.s_indent())
|
670
|
else:
|
671
|
lst.append(self.s_contents(tfc))
|
672
|
lst.append("</" + self.tag_name + ">")
|
673
|
|
674
|
return "".join(lst)
|
675
|
|
676
|
def s_start_tag(self, tfc):
|
677
|
return tfc.s_indent() + "<" + self._s_start_tag_name_attrs(tfc) + ">"
|
678
|
|
679
|
def s_end_tag(self):
|
680
|
return "</" + self.tag_name + ">"
|
681
|
|
682
|
def s_compact_tag(self, tfc):
|
683
|
return tfc.s_indent() + "<" + self._s_start_tag_name_attrs(tfc) + "/>"
|
684
|
|
685
|
def is_element(self):
|
686
|
return True
|
687
|
|
688
|
|
689
|
|
690
|
class TextElement(CoreElement):
|
691
|
"""
|
692
|
An element that cannot have other elements nested inside it.
|
693
|
|
694
|
Attributes:
|
695
|
attr
|
696
|
text
|
697
|
"""
|
698
|
def __init__(self, tag_name, def_attr, def_attr_value, attr_names = []):
|
699
|
CoreElement.__init__(self, tag_name, def_attr, def_attr_value,
|
700
|
attr_names)
|
701
|
self.lock = False
|
702
|
self.text = ""
|
703
|
self.lock = True
|
704
|
|
705
|
def text_check(self):
|
706
|
pass
|
707
|
|
708
|
def has_contents(self):
|
709
|
return not not self.text
|
710
|
|
711
|
def multiline_contents(self):
|
712
|
return self.text.find("\n") >= 0
|
713
|
|
714
|
def s_contents(self, tfc):
|
715
|
return self.text
|
716
|
|
717
|
|
718
|
|
719
|
class Nest(ElementItem):
|
720
|
"""
|
721
|
A data structure that can store Elements, nested inside it.
|
722
|
|
723
|
Note: this is not, itself, an Element! Because it is not an XML
|
724
|
element, it has no tags. Its string representation is the
|
725
|
representations of the elements nested inside it.
|
726
|
|
727
|
NestElement and XMLDoc inherit from this.
|
728
|
"""
|
729
|
def __init__(self):
|
730
|
self.lock = False
|
731
|
self._parent = None
|
732
|
self._name = ""
|
733
|
self.elements = []
|
734
|
self.lock = True
|
735
|
def __len__(self):
|
736
|
return len(self.elements)
|
737
|
def __getitem__(self, key):
|
738
|
return self.elements[key]
|
739
|
def __setitem__(self, key, value):
|
740
|
self.elements[key] = value
|
741
|
def __delitem__(self, key):
|
742
|
del(self.elements[key])
|
743
|
|
744
|
def _do_setattr(self, name, value):
|
745
|
if isinstance(value, XMLItem):
|
746
|
value._parent = self
|
747
|
value._name = name
|
748
|
self.elements.append(value)
|
749
|
self.__dict__[name] = value
|
750
|
|
751
|
def __setattr__(self, name, value):
|
752
|
# Lots of magic here! This is important stuff. Here's how it works:
|
753
|
#
|
754
|
# 0) self.lock is a boolean, set to False initially and then set
|
755
|
# to True at the end of __init__(). When it's False, you can add new
|
756
|
# members to the class instance without any sort of checks; once
|
757
|
# it's set True, __setattr__() starts checking assignments. By
|
758
|
# default, when lock is True, any assignment to an old member
|
759
|
# has to be of matching type. You can add a new member to the
|
760
|
# class instance, but __setattr__() checks to ensure that the
|
761
|
# new member is an XMLItem.
|
762
|
#
|
763
|
# 1) Whether self.lock is set or not, if the value is an XMLitem,
|
764
|
# then this will properly add the XMLItem into the tree
|
765
|
# structure. The XMLItem will have _parent set to the parent,
|
766
|
# will have _name set to its name in the parent, and will be
|
767
|
# added to the parent's elements list. This is handled by
|
768
|
# _do_setattr().
|
769
|
#
|
770
|
# 2) As a convenience for the user, if the user is assigning a
|
771
|
# string, and self is an XMLItem that has a .text value, this
|
772
|
# will assign the string to the .text value. This allows usages
|
773
|
# like "e.title = string", which is very nice. Before I added
|
774
|
# this, I frequently wrote that instead of "e.title.text =
|
775
|
# string" so I wanted it to just work. Likewise the user can
|
776
|
# assign a time value directly into Timestamp elements.
|
777
|
#
|
778
|
# 3) This checks assignments to _parent, and makes sure they are
|
779
|
# plausible (either an XMLItem, or None).
|
780
|
|
781
|
try:
|
782
|
lock = self.lock
|
783
|
except AttributeError:
|
784
|
lock = False
|
785
|
|
786
|
if not lock:
|
787
|
self._do_setattr(name, value)
|
788
|
return
|
789
|
|
790
|
dict = self.__dict__
|
791
|
if not name in dict:
|
792
|
# brand-new item
|
793
|
if lock:
|
794
|
self.nest_check()
|
795
|
if not isinstance(value, XMLItem):
|
796
|
raise TypeError, "only XMLItem is permitted"
|
797
|
self._do_setattr(name, value)
|
798
|
return
|
799
|
|
800
|
if name == "_parent" or name == "root_element":
|
801
|
if not (isinstance(value, XMLItem) or value is None):
|
802
|
raise TypeError, "only XMLItem or None is permitted"
|
803
|
self.__dict__[name] = value
|
804
|
return
|
805
|
|
806
|
if name == "_name" and type(value) == type(""):
|
807
|
self.__dict__[name] = value
|
808
|
return
|
809
|
|
810
|
# for Timestamp elements, allow this: element = time
|
811
|
# (where "time" is a float value, since uses float for times)
|
812
|
# Also allow valid timestamp strings.
|
813
|
if isinstance(self.__dict__[name], Timestamp):
|
814
|
if type(value) == type(1.0):
|
815
|
self.__dict__[name].time = value
|
816
|
return
|
817
|
elif type(value) == type(""):
|
818
|
t = utc_time_from_s_timestamp(value)
|
819
|
if t:
|
820
|
self.__dict__[name].time = t
|
821
|
else:
|
822
|
raise ValueError, "value must be a valid timestamp string"
|
823
|
return
|
824
|
|
825
|
# Allow string assignment to go to the .text attribute, for
|
826
|
# elements that allow it. All TextElements allow it;
|
827
|
# Elements will allow it if they do not nave nested elements.
|
828
|
# text_check() raises an error if it's not allowed.
|
829
|
if isinstance(self.__dict__[name], CoreElement) and \
|
830
|
type(value) == type(""):
|
831
|
self.__dict__[name].text_check()
|
832
|
self.__dict__[name].text = value
|
833
|
return
|
834
|
|
835
|
# locked item so do checks
|
836
|
if not type(self.__dict__[name]) is type(value):
|
837
|
raise TypeError, "value is not the same type"
|
838
|
|
839
|
self.__dict__[name] = value
|
840
|
|
841
|
def __delattr__(self, name):
|
842
|
# This won't be used often, if ever, but if anyone tries it, it
|
843
|
# should work.
|
844
|
if isinstance(self.name, XMLItem):
|
845
|
o = self.__dict__[name]
|
846
|
self.elements.remove(o)
|
847
|
del(self.__dict__[name])
|
848
|
else:
|
849
|
# REVIEW: what error should this raise?
|
850
|
raise TypeError, "cannot delete that item"
|
851
|
|
852
|
def nest_check(self):
|
853
|
pass
|
854
|
|
855
|
def is_element(self):
|
856
|
# a Nest is not really an element
|
857
|
return False
|
858
|
|
859
|
def has_contents(self):
|
860
|
for element in self.elements:
|
861
|
if element:
|
862
|
return True
|
863
|
# empty iff all of the elements were empty
|
864
|
return False
|
865
|
|
866
|
def __nonzero__(self):
|
867
|
return self.has_contents()
|
868
|
|
869
|
def multiline_contents(self):
|
870
|
# if there are any contents, we want multiline for nested tags
|
871
|
return self.has_contents()
|
872
|
|
873
|
def s_contents(self, tfc):
|
874
|
if len(self.elements) > 0:
|
875
|
# if any nested elements exist, we show those
|
876
|
lst = []
|
877
|
|
878
|
for element in self.elements:
|
879
|
s = element._s_tag(tfc)
|
880
|
if s:
|
881
|
lst.append(s)
|
882
|
|
883
|
return "\n".join(lst)
|
884
|
else:
|
885
|
return ""
|
886
|
|
887
|
assert False, "not possible to reach this line."
|
888
|
return ""
|
889
|
|
890
|
def s_tree(self):
|
891
|
level = self.level()
|
892
|
tup = (level, self.s_name(), self.__class__.__name__)
|
893
|
s = "%2d) %s (instance of %s)" % tup
|
894
|
lst = []
|
895
|
lst.append(s)
|
896
|
for element in self.elements:
|
897
|
s = element.s_tree()
|
898
|
lst.append(s)
|
899
|
return "\n".join(lst)
|
900
|
|
901
|
def _s_tag(self, tfc):
|
902
|
return self.s_contents(tfc)
|
903
|
|
904
|
|
905
|
|
906
|
|
907
|
class NestElement(Nest,CoreElement):
|
908
|
"""
|
909
|
An element that can have other elements nested inside it.
|
910
|
|
911
|
Attributes:
|
912
|
attr
|
913
|
elements: a list of other elements nested inside this one.
|
914
|
"""
|
915
|
def __init__(self, tag_name, def_attr, def_attr_value, attr_names=[]):
|
916
|
CoreElement.__init__(self, tag_name, def_attr, def_attr_value,
|
917
|
attr_names)
|
918
|
self.lock = False
|
919
|
self.elements = []
|
920
|
self.lock = True
|
921
|
|
922
|
def is_element(self):
|
923
|
return True
|
924
|
|
925
|
def __nonzero__(self):
|
926
|
return CoreElement.__nonzero__(self)
|
927
|
|
928
|
def _s_tag(self, tfc):
|
929
|
return CoreElement._s_tag(self, tfc)
|
930
|
|
931
|
|
932
|
|
933
|
class Element(NestElement,TextElement):
|
934
|
"""
|
935
|
A class to represent an arbitrary XML tag. Can either have other XML
|
936
|
elements nested inside it, or else can have a text string value, but
|
937
|
never both at the same time.
|
938
|
|
939
|
This is intended for user-defined XML tags. The user can just use
|
940
|
"Element" for all custom tags.
|
941
|
|
942
|
PyAtom doesn't use this; PyAtom uses TextElement for tags with a text
|
943
|
string value, and NestElement for tags that nest other elements. Users
|
944
|
can do the same, or can just use Element, as they like.
|
945
|
|
946
|
Attributes:
|
947
|
attr
|
948
|
elements: a list of other elements nested inside, if any
|
949
|
text: a text string value, if any
|
950
|
|
951
|
Note: if text is set, elements will be empty, and vice-versa. If you
|
952
|
have elements nested inside and try to set the .text, this will raise
|
953
|
an exception, and vice-versa.
|
954
|
"""
|
955
|
# A Element can have other elements nested inside it, or it can have
|
956
|
# a single ".text" string value. But never both at the same time.
|
957
|
# Once you nest another element, you can no longer use the .text.
|
958
|
def __init__(self, tag_name, def_attr, def_attr_value, attr_names=[]):
|
959
|
NestElement.__init__(self, tag_name, def_attr, def_attr_value,
|
960
|
attr_names)
|
961
|
self.lock = False
|
962
|
self.text = ""
|
963
|
self.lock = True
|
964
|
|
965
|
def nest_check(self):
|
966
|
if self.text:
|
967
|
raise TypeError, "Element has text contents so cannot nest"
|
968
|
|
969
|
def text_check(self):
|
970
|
if len(self.elements) > 0:
|
971
|
raise TypeError, "Element has nested elements so cannot assign text"
|
972
|
|
973
|
def has_contents(self):
|
974
|
return NestElement.has_contents(self) or TextElement.has_contents(self)
|
975
|
|
976
|
def multiline_contents(self):
|
977
|
return NestElement.has_contents(self) or self.text.find("\n") >= 0
|
978
|
|
979
|
def s_contents(self, tfc):
|
980
|
if len(self.elements) > 0:
|
981
|
return NestElement.s_contents(self, tfc)
|
982
|
elif self.text:
|
983
|
return TextElement.s_contents(self, tfc)
|
984
|
else:
|
985
|
return ""
|
986
|
assert False, "not possible to reach this line."
|
987
|
|
988
|
def s_tree(self):
|
989
|
lst = []
|
990
|
if len(self.elements) > 0:
|
991
|
level = self.level()
|
992
|
tup = (level, self.s_name(), self.__class__.__name__)
|
993
|
s = "%2d) %s (instance of %s)" % tup
|
994
|
lst.append(s)
|
995
|
for element in self.elements:
|
996
|
s = element.s_tree()
|
997
|
lst.append(s)
|
998
|
return "\n".join(lst)
|
999
|
elif self.text:
|
1000
|
return XMLItem.s_tree(self)
|
1001
|
else:
|
1002
|
level = self.level()
|
1003
|
tfc = TFC(level)
|
1004
|
s = "%2d) %s %s" % (level, self.s_name(), "empty Element...")
|
1005
|
return s
|
1006
|
assert False, "not possible to reach this line."
|
1007
|
|
1008
|
|
1009
|
|
1010
|
class Collection(XMLItem):
|
1011
|
"""
|
1012
|
A Collection contains 0 or more Elements, but isn't an XML element.
|
1013
|
Use where a run of 0 or more Elements of the same type is legal.
|
1014
|
|
1015
|
When you init your Collection, you specify what class of Element it will
|
1016
|
contain. Attempts to append an Element of a different class will raise
|
1017
|
an exception. Note, however, that the various Element classes all
|
1018
|
inherit from base classes, and you can specify a class from higher up in
|
1019
|
the inheritance tree. You could, if you wanted, make a Collection
|
1020
|
containing "XMLItem" and then any item defined in PyAtom would be legal
|
1021
|
in that collection. (See XMLDoc, which contains two collections of
|
1022
|
DocItem.)
|
1023
|
|
1024
|
Attributes:
|
1025
|
contains: the class of element this Collection will contain
|
1026
|
elements: a list of other elements nested inside, if any
|
1027
|
|
1028
|
Note: The string representation of a Collection is just the string
|
1029
|
representations of the elements inside it. However, a verbose string
|
1030
|
reprentation may have an XML comment like this:
|
1031
|
|
1032
|
<!-- Collection of <class> with <n> elements -->
|
1033
|
|
1034
|
where <n> is the number of elements in the Collection and <class> is the
|
1035
|
name of the class in this Collection.
|
1036
|
"""
|
1037
|
def __init__(self, element_class):
|
1038
|
self.lock = False
|
1039
|
self._parent = None
|
1040
|
self._name = ""
|
1041
|
self.elements = []
|
1042
|
self.contains = element_class
|
1043
|
self.lock = True
|
1044
|
def __len__(self):
|
1045
|
return len(self.elements)
|
1046
|
def __getitem__(self, key):
|
1047
|
return self.elements[key]
|
1048
|
def __setitem__(self, key, value):
|
1049
|
if not isinstance(value, self.contains):
|
1050
|
raise TypeError, "object is the wrong type for this collection"
|
1051
|
self.elements[key] = value
|
1052
|
def __delitem__(self, key):
|
1053
|
del(self.elements[key])
|
1054
|
|
1055
|
def __nonzero__(self):
|
1056
|
# there are no attrs so if any element is nonzero, collection is too
|
1057
|
for element in self.elements:
|
1058
|
if element:
|
1059
|
return True
|
1060
|
return False
|
1061
|
|
1062
|
def is_element(self):
|
1063
|
# A Collection is not really an Element
|
1064
|
return False
|
1065
|
|
1066
|
def s_coll(self):
|
1067
|
name = self.contains.__name__
|
1068
|
n = len(self.elements)
|
1069
|
if n == 1:
|
1070
|
el = "element"
|
1071
|
else:
|
1072
|
el = "elements"
|
1073
|
return "collection of %s with %d %s" % (name, n, el)
|
1074
|
|
1075
|
def append(self, element):
|
1076
|
if not isinstance(element, self.contains):
|
1077
|
print >> sys.stderr, "Error: attempted to insert", \
|
1078
|
type(element).__name__, \
|
1079
|
"into collection of", self.contains.__name__
|
1080
|
raise TypeError, "object is the wrong type for this collection"
|
1081
|
element._parent = self
|
1082
|
self.elements.append(element)
|
1083
|
|
1084
|
def _s_tag(self, tfc):
|
1085
|
# A collection exists only as a place to put real elements.
|
1086
|
# There are no start or end tags...
|
1087
|
# When tfc.b_print_all() is true, we do put an XML comment.
|
1088
|
|
1089
|
if not self.elements:
|
1090
|
if not tfc.b_print_all():
|
1091
|
return ""
|
1092
|
|
1093
|
lst = []
|
1094
|
|
1095
|
if tfc.b_print_verbose():
|
1096
|
s = "%s%s%s%s" % (tfc.s_indent(), "<!-- ", self.s_coll(), " -->")
|
1097
|
lst.append(s)
|
1098
|
tfc = tfc.indent_by(1)
|
1099
|
|
1100
|
for element in self.elements:
|
1101
|
s = element._s_tag(tfc)
|
1102
|
if s:
|
1103
|
lst.append(s)
|
1104
|
|
1105
|
return "\n".join(lst)
|
1106
|
|
1107
|
def s_tree(self):
|
1108
|
level = self.level()
|
1109
|
s = "%2d) %s %s" % (level, self.s_name(), self.s_coll())
|
1110
|
lst = []
|
1111
|
lst.append(s)
|
1112
|
for element in self.elements:
|
1113
|
s = element.s_tree()
|
1114
|
lst.append(s)
|
1115
|
return "\n".join(lst)
|
1116
|
|
1117
|
|
1118
|
|
1119
|
class XMLDeclaration(XMLItem):
|
1120
|
# REVIEW: should this print multi-line for multiple attrs?
|
1121
|
def __init__(self):
|
1122
|
self._parent = None
|
1123
|
self._name = ""
|
1124
|
self.attrs = {}
|
1125
|
self.attrs[s_version] = "1.0"
|
1126
|
self.attrs[s_encoding] = "utf-8"
|
1127
|
self.attr_names = [s_version, s_encoding, s_standalone]
|
1128
|
|
1129
|
def _s_tag(self, tfc):
|
1130
|
# An XMLDeclaration() instance is never empty, so always prints.
|
1131
|
|
1132
|
lst = []
|
1133
|
s = "%s%s" % (tfc.s_indent(), "<?xml")
|
1134
|
lst.append(s)
|
1135
|
# 0) show all attrs in the order of attr_names
|
1136
|
for attr in self.attr_names:
|
1137
|
if attr in self.attrs.keys():
|
1138
|
s_attr = ' %s="%s"' % (attr, self.attrs[attr])
|
1139
|
lst.append(s_attr)
|
1140
|
# 1) any attrs not in attr_names? list them, too
|
1141
|
for attr in self.attrs:
|
1142
|
if not attr in self.attr_names:
|
1143
|
s_attr = ' %s="%s"' % (attr, self.attrs[attr])
|
1144
|
lst.append(s_attr)
|
1145
|
lst.append("?>")
|
1146
|
|
1147
|
return "".join(lst)
|
1148
|
|
1149
|
def __nonzero__(self):
|
1150
|
# Returns True because the XML Declaration is never empty.
|
1151
|
return True
|
1152
|
|
1153
|
def is_element(self):
|
1154
|
return True
|
1155
|
|
1156
|
|
1157
|
|
1158
|
class XMLDoc(Nest):
|
1159
|
"""
|
1160
|
A data structure to represent an XML Document. It will have the
|
1161
|
following structure:
|
1162
|
|
1163
|
the XML Declaration item
|
1164
|
0 or more document-level XML items
|
1165
|
exactly one XML item (the "root tag")
|
1166
|
0 or more document-level XML items
|
1167
|
|
1168
|
document level XML items are: Comment, PI, MarkupDecl
|
1169
|
|
1170
|
|
1171
|
Attributes:
|
1172
|
xml_decl: the XMLDeclaration item
|
1173
|
docitems_above: a collection of DocItem (items above root_element)
|
1174
|
root_element: the XML tag containing your data
|
1175
|
docitems_below: a collection of DocItem (items below root_element)
|
1176
|
|
1177
|
Note: usually the root_element has lots of other XML items nested inside
|
1178
|
it!
|
1179
|
"""
|
1180
|
def __init__(self, root_element=None):
|
1181
|
Nest.__init__(self)
|
1182
|
|
1183
|
self._name = "XMLDoc"
|
1184
|
|
1185
|
self.xml_decl = XMLDeclaration()
|
1186
|
self.docitems_above = Collection(DocItem)
|
1187
|
|
1188
|
if not root_element:
|
1189
|
root_element = Comment("no root element yet")
|
1190
|
self.root_element = root_element
|
1191
|
|
1192
|
self.docitems_below = Collection(DocItem)
|
1193
|
|
1194
|
def __setattr__(self, name, value):
|
1195
|
# root_element may always be set to any ElementItem
|
1196
|
if name == "root_element":
|
1197
|
if not (isinstance(value, ElementItem)):
|
1198
|
raise TypeError, "only ElementItem is permitted"
|
1199
|
|
1200
|
self.lock = False
|
1201
|
# Item checks out, so assign it. root_element should only
|
1202
|
# ever have one element, and we always put the new element
|
1203
|
# in the same slot in elements[].
|
1204
|
if "i_root_element" in self.__dict__:
|
1205
|
# Assign new root_element over old one in elements[]
|
1206
|
assert self.elements[self.i_root_element] == self.root_element
|
1207
|
self.elements[self.i_root_element] = value
|
1208
|
else:
|
1209
|
# This is the first time root_element was ever set.
|
1210
|
self.i_root_element = len(self.elements)
|
1211
|
self.elements.append(value)
|
1212
|
|
1213
|
value._parent = self
|
1214
|
value._name = name
|
1215
|
self.__dict__[name] = value
|
1216
|
self.lock = True
|
1217
|
else:
|
1218
|
# for all other, fall through to inherited behavior
|
1219
|
Nest.__setattr__(self, name, value)
|
1220
|
|
1221
|
def Validate(self):
|
1222
|
# XMLDoc never has parent. Never change this!
|
1223
|
assert self._parent == None
|
1224
|
return True
|
1225
|
|
1226
|
|
1227
|
|
1228
|
def local_time_from_utc_time(t):
|
1229
|
return t - time.timezone
|
1230
|
|
1231
|
def utc_time_from_local_time(t):
|
1232
|
return t + time.timezone
|
1233
|
|
1234
|
def local_time():
|
1235
|
return time.time() - time.timezone
|
1236
|
|
1237
|
def utc_time():
|
1238
|
return time.time()
|
1239
|
|
1240
|
|
1241
|
class TimeSeq(object):
|
1242
|
"""
|
1243
|
A class to generate a sequence of timestamps.
|
1244
|
|
1245
|
Atom feed validators complain if multiple timestamps have the same
|
1246
|
value, so this provides a convenient way to set a bunch of timestamps
|
1247
|
all at least one second different from each other.
|
1248
|
"""
|
1249
|
def __init__(self, init_time=0):
|
1250
|
if init_time == 0:
|
1251
|
self.time = local_time()
|
1252
|
else:
|
1253
|
self.time = float(init_time)
|
1254
|
def next(self):
|
1255
|
t = self.time
|
1256
|
self.time += 1.0
|
1257
|
return t
|
1258
|
|
1259
|
format_RFC3339 = "%Y-%m-%dT%H:%M:%S"
|
1260
|
|
1261
|
def parse_time_offset(s):
|
1262
|
s = s.lstrip().rstrip()
|
1263
|
|
1264
|
if (s == '' or s == 'Z' or s == 'z'):
|
1265
|
return 0
|
1266
|
|
1267
|
m = pat_time_offset.search(s)
|
1268
|
sign = m.group(1)
|
1269
|
offset_hour = int(m.group(2))
|
1270
|
offset_min = int(m.group(3))
|
1271
|
offset = offset_hour * 3600 + offset_min * 60
|
1272
|
if sign == "-":
|
1273
|
offset *= -1
|
1274
|
return offset
|
1275
|
|
1276
|
def s_timestamp(utc_time, time_offset="Z"):
|
1277
|
"""
|
1278
|
Format a time and offset into a string.
|
1279
|
|
1280
|
utc_time
|
1281
|
a floating-point value, time in the UTC time zone
|
1282
|
s_time_offset
|
1283
|
a string specifying an offset from UTC. Examples:
|
1284
|
z or Z -- offset is 0 ("Zulu" time, UTC, aka GMT)
|
1285
|
-08:00 -- 8 hours earlier than UTC (Pacific time zone)
|
1286
|
"" -- empty string is technically not legal, but may work
|
1287
|
|
1288
|
Notes:
|
1289
|
Returned string complies with RFC3339; uses ISO8601 date format.
|
1290
|
Example: 2003-12-13T18:30:02Z
|
1291
|
Example: 2003-12-13T18:30:02+02:00
|
1292
|
"""
|
1293
|
|
1294
|
if not utc_time:
|
1295
|
return ""
|
1296
|
|
1297
|
utc_time += parse_time_offset(time_offset)
|
1298
|
|
1299
|
try:
|
1300
|
s = time.strftime(format_RFC3339, time.localtime(utc_time))
|
1301
|
except:
|
1302
|
return ""
|
1303
|
|
1304
|
return s + time_offset
|
1305
|
|
1306
|
|
1307
|
|
1308
|
pat_RFC3339 = re.compile("(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)(.*)")
|
1309
|
pat_time_offset = re.compile("([+-])(\d\d):(\d\d)")
|
1310
|
|
1311
|
def utc_time_from_s_timestamp(s_date_time_stamp):
|
1312
|
# parse RFC3339-compatible times that use ISO8601 date format
|
1313
|
# date time stamp example: 2003-12-13T18:30:02Z
|
1314
|
# date time stamp example: 2003-12-13T18:30:02+02:00
|
1315
|
# leaving off the suffix is technically not legal, but allowed
|
1316
|
|
1317
|
s_date_time_stamp = s_date_time_stamp.lstrip().rstrip()
|
1318
|
|
1319
|
try:
|
1320
|
m = pat_RFC3339.search(s_date_time_stamp)
|
1321
|
year = int(m.group(1))
|
1322
|
mon = int(m.group(2))
|
1323
|
mday = int(m.group(3))
|
1324
|
hour = int(m.group(4))
|
1325
|
min = int(m.group(5))
|
1326
|
sec = int(m.group(6))
|
1327
|
tup = (year, mon, mday, hour, min, sec, -1, -1, -1)
|
1328
|
t = time.mktime(tup)
|
1329
|
|
1330
|
s = m.group(7)
|
1331
|
t += parse_time_offset(s)
|
1332
|
|
1333
|
return t
|
1334
|
|
1335
|
except:
|
1336
|
return 0.0
|
1337
|
|
1338
|
assert False, "impossible to reach this line"
|
1339
|
|
1340
|
|
1341
|
def s_time_offset():
|
1342
|
"""
|
1343
|
Return a string with local offset from UTC in RFC3339 format.
|
1344
|
"""
|
1345
|
|
1346
|
# If t is set to local time in seconds since the epoch, then...
|
1347
|
# ...offset is the value you add to t to get UTC. This is the
|
1348
|
# reverse of time.timezone.
|
1349
|
|
1350
|
offset = -(time.timezone)
|
1351
|
|
1352
|
if offset > 0:
|
1353
|
sign = "+"
|
1354
|
else:
|
1355
|
sign = "-"
|
1356
|
offset = abs(offset)
|
1357
|
|
1358
|
offset_hour = offset // (60 * 60)
|
1359
|
offset_min = (offset // 60) % 60
|
1360
|
return "%s%02d:%02d" % (sign, offset_hour, offset_min)
|
1361
|
|
1362
|
s_offset_local = s_time_offset()
|
1363
|
|
1364
|
s_offset_default = s_offset_local
|
1365
|
|
1366
|
def set_default_time_offset(s):
|
1367
|
global s_offset_default
|
1368
|
s_offset_default = s
|
1369
|
|
1370
|
|
1371
|
class Timestamp(CoreElement):
|
1372
|
def __init__(self, tag_name, time=0.0):
|
1373
|
CoreElement.__init__(self, tag_name, None, None)
|
1374
|
self.lock = False
|
1375
|
self.time = time
|
1376
|
self.time_offset = s_offset_default
|
1377
|
self.lock = True
|
1378
|
|
1379
|
def __delattr__(self, name):
|
1380
|
CoreElement.__delattr_(self, name)
|
1381
|
|
1382
|
def __getattr__(self, name):
|
1383
|
if name == "text":
|
1384
|
return s_timestamp(self.time, self.time_offset)
|
1385
|
return CoreElement.__getattr_(self, name)
|
1386
|
|
1387
|
def __setattr__(self, name, value):
|
1388
|
if name == "text":
|
1389
|
if type(value) != type(""):
|
1390
|
raise TypeError, "can only assign a string to .text"
|
1391
|
t = utc_time_from_s_timestamp(value)
|
1392
|
if t:
|
1393
|
self.time = utc_time_from_s_timestamp(value)
|
1394
|
else:
|
1395
|
raise ValueError, "value must be a valid timestamp string"
|
1396
|
return
|
1397
|
CoreElement.__setattr__(self, name, value)
|
1398
|
|
1399
|
def has_contents(self):
|
1400
|
return self.time != 0
|
1401
|
|
1402
|
def multiline_contents(self):
|
1403
|
return False
|
1404
|
|
1405
|
def s_contents(self, tfc):
|
1406
|
return s_timestamp(self.time, self.time_offset)
|
1407
|
|
1408
|
def update(self):
|
1409
|
self.time = local_time()
|
1410
|
return self
|
1411
|
|
1412
|
|
1413
|
|
1414
|
|
1415
|
# Below are all the classes to implement Atom using the above tools.
|
1416
|
|
1417
|
|
1418
|
|
1419
|
class AtomText(TextElement):
|
1420
|
def __init__(self, tag_name):
|
1421
|
attr_names = [ s_type ]
|
1422
|
# legal values of type: "text", "html", "xhtml"
|
1423
|
TextElement.__init__(self, tag_name, None, None, attr_names)
|
1424
|
|
1425
|
class Title(AtomText):
|
1426
|
def __init__(self, text=""):
|
1427
|
AtomText.__init__(self, "title")
|
1428
|
self.text = text
|
1429
|
|
1430
|
class Subtitle(AtomText):
|
1431
|
def __init__(self, text=""):
|
1432
|
AtomText.__init__(self, "subtitle")
|
1433
|
self.text = text
|
1434
|
|
1435
|
class Content(AtomText):
|
1436
|
def __init__(self, text=""):
|
1437
|
AtomText.__init__(self, "content")
|
1438
|
self.text = text
|
1439
|
|
1440
|
class Summary(AtomText):
|
1441
|
def __init__(self, text=""):
|
1442
|
AtomText.__init__(self, "summary")
|
1443
|
self.text = text
|
1444
|
|
1445
|
class Rights(AtomText):
|
1446
|
def __init__(self, text=""):
|
1447
|
AtomText.__init__(self, "rights")
|
1448
|
self.text = text
|
1449
|
|
1450
|
class Id(TextElement):
|
1451
|
def __init__(self, text=""):
|
1452
|
TextElement.__init__(self, "id", None, None)
|
1453
|
self.text = text
|
1454
|
|
1455
|
class Generator(TextElement):
|
1456
|
def __init__(self):
|
1457
|
attr_names = [ "uri", "version" ]
|
1458
|
TextElement.__init__(self, "generator", None, None, attr_names)
|
1459
|
|
1460
|
class Category(TextElement):
|
1461
|
def __init__(self, term_val=""):
|
1462
|
attr_names = [s_term, "scheme", "label"]
|
1463
|
TextElement.__init__(self, "category", s_term, term_val, attr_names)
|
1464
|
|
1465
|
class Link(TextElement):
|
1466
|
def __init__(self, href_val=""):
|
1467
|
attr_names = [
|
1468
|
s_href, "rel", "type", "hreflang", "title", "length", s_lang]
|
1469
|
TextElement.__init__(self, "link", s_href, href_val, attr_names)
|
1470
|
|
1471
|
class Icon(TextElement):
|
1472
|
def __init__(self):
|
1473
|
TextElement.__init__(self, "icon", None, None)
|
1474
|
|
1475
|
class Logo(TextElement):
|
1476
|
def __init__(self):
|
1477
|
TextElement.__init__(self, "logo", None, None)
|
1478
|
|
1479
|
class Name(TextElement):
|
1480
|
def __init__(self, text=""):
|
1481
|
TextElement.__init__(self, "name", None, None)
|
1482
|
self.text = text
|
1483
|
|
1484
|
class Email(TextElement):
|
1485
|
def __init__(self):
|
1486
|
TextElement.__init__(self, "email", None, None)
|
1487
|
|
1488
|
class Uri(TextElement):
|
1489
|
def __init__(self):
|
1490
|
TextElement.__init__(self, "uri", None, None)
|
1491
|
|
1492
|
|
1493
|
|
1494
|
class BasicAuthor(NestElement):
|
1495
|
def __init__(self, tag_name, name):
|
1496
|
NestElement.__init__(self, tag_name, None, None)
|
1497
|
self.name = Name(name)
|
1498
|
self.email = Email()
|
1499
|
self.uri = Uri()
|
1500
|
|
1501
|
class Author(BasicAuthor):
|
1502
|
def __init__(self, name=""):
|
1503
|
BasicAuthor.__init__(self, "author", name)
|
1504
|
|
1505
|
class Contributor(BasicAuthor):
|
1506
|
def __init__(self, name=""):
|
1507
|
BasicAuthor.__init__(self, "contributor", name)
|
1508
|
|
1509
|
|
1510
|
|
1511
|
class Updated(Timestamp):
|
1512
|
def __init__(self, time=0.0):
|
1513
|
Timestamp.__init__(self, "updated", time)
|
1514
|
|
1515
|
class Published(Timestamp):
|
1516
|
def __init__(self, time=0.0):
|
1517
|
Timestamp.__init__(self, "published", time)
|
1518
|
|
1519
|
|
1520
|
|
1521
|
class FeedElement(NestElement):
|
1522
|
def __init__(self, tag_name):
|
1523
|
NestElement.__init__(self, tag_name, None, None)
|
1524
|
|
1525
|
self.title = Title("")
|
1526
|
self.id = Id("")
|
1527
|
self.updated = Updated()
|
1528
|
self.authors = Collection(Author)
|
1529
|
self.links = Collection(Link)
|
1530
|
|
1531
|
self.subtitle = Subtitle("")
|
1532
|
self.categories = Collection(Category)
|
1533
|
self.contributors = Collection(Contributor)
|
1534
|
self.generator = Generator()
|
1535
|
self.icon = Icon()
|
1536
|
self.logo = Logo()
|
1537
|
self.rights = Rights("")
|
1538
|
|
1539
|
class Feed(FeedElement):
|
1540
|
def __init__(self):
|
1541
|
FeedElement.__init__(self, "feed")
|
1542
|
self.attrs["xmlns"] = "http://www.w3.org/2005/Atom"
|
1543
|
self.title.text = "Title of Feed Goes Here"
|
1544
|
self.id.text = "ID of Feed Goes Here"
|
1545
|
self.entries = Collection(Entry)
|
1546
|
|
1547
|
class Source(FeedElement):
|
1548
|
def __init__(self):
|
1549
|
FeedElement.__init__(self, "source")
|
1550
|
|
1551
|
|
1552
|
|
1553
|
class Entry(NestElement):
|
1554
|
def __init__(self):
|
1555
|
NestElement.__init__(self, "entry", None, None)
|
1556
|
self.title = Title("Title of Entry Goes Here")
|
1557
|
self.id = Id("ID of Entry Goes Here")
|
1558
|
self.updated = Updated()
|
1559
|
self.authors = Collection(Author)
|
1560
|
self.links = Collection(Link)
|
1561
|
|
1562
|
self.content = Content("")
|
1563
|
self.summary = Summary("")
|
1564
|
self.categories = Collection(Category)
|
1565
|
self.contributors = Collection(Contributor)
|
1566
|
self.published = Published()
|
1567
|
self.source = Source()
|
1568
|
self.rights = Rights("")
|
1569
|
|
1570
|
|
1571
|
|
1572
|
def diff(s0, name0, s1, name1):
|
1573
|
from difflib import ndiff
|
1574
|
lst0 = s0.split("\n")
|
1575
|
lst1 = s1.split("\n")
|
1576
|
report = '\n'.join(ndiff(lst0, lst1))
|
1577
|
return report
|
1578
|
|
1579
|
|
1580
|
def run_test_cases():
|
1581
|
|
1582
|
# The default is to make time stamps in local time with appropriate
|
1583
|
# offset; for our tests, we want a default "Z" offset instead.
|
1584
|
set_default_time_offset("Z")
|
1585
|
|
1586
|
failed_tests = 0
|
1587
|
|
1588
|
|
1589
|
# Test: convert current time into a timestamp string and back
|
1590
|
|
1591
|
now = local_time()
|
1592
|
# timestamp format does not allow fractional seconds
|
1593
|
now = float(int(now)) # truncate any fractional seconds
|
1594
|
s = s_timestamp(now)
|
1595
|
t = utc_time_from_s_timestamp(s)
|
1596
|
if now != t:
|
1597
|
failed_tests += 1
|
1598
|
print "test case failed:"
|
1599
|
print now, "-- original timestamp"
|
1600
|
print t, "-- converted timestamp does not match"
|
1601
|
|
1602
|
|
1603
|
# Test: convert a timestamp string to a time value and back
|
1604
|
|
1605
|
s_time = "2003-12-13T18:30:02Z"
|
1606
|
t = utc_time_from_s_timestamp(s_time)
|
1607
|
s = s_timestamp(t)
|
1608
|
if s_time != s:
|
1609
|
failed_tests += 1
|
1610
|
print "test case failed:"
|
1611
|
print s_time, "-- original timestamp"
|
1612
|
print s, "-- converted timestamp does not match"
|
1613
|
|
1614
|
|
1615
|
# Test: generate the "Atom-Powered Robots Run Amok" example
|
1616
|
#
|
1617
|
# Note: the original had some of the XML declarations in
|
1618
|
# a different order than PyAtom puts them. I swapped around
|
1619
|
# the lines here so they would match the PyAtom order. Other
|
1620
|
# than that, this is the example from:
|
1621
|
#
|
1622
|
# http://www.atomenabled.org/developers/syndication/#sampleFeed
|
1623
|
|
1624
|
s_example = """\
|
1625
|
<?xml version="1.0" encoding="utf-8"?>
|
1626
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
1627
|
<title>Example Feed</title>
|
1628
|
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
1629
|
<updated>2003-12-13T18:30:02Z</updated>
|
1630
|
<author>
|
1631
|
<name>John Doe</name>
|
1632
|
</author>
|
1633
|
<link href="http://example.org/"/>
|
1634
|
<entry>
|
1635
|
<title>Atom-Powered Robots Run Amok</title>
|
1636
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
1637
|
<updated>2003-12-13T18:30:02Z</updated>
|
1638
|
<link href="http://example.org/2003/12/13/atom03"/>
|
1639
|
<summary>Some text.</summary>
|
1640
|
</entry>
|
1641
|
</feed>"""
|
1642
|
|
1643
|
xmldoc = XMLDoc()
|
1644
|
|
1645
|
feed = Feed()
|
1646
|
xmldoc.root_element = feed
|
1647
|
|
1648
|
feed.title = "Example Feed"
|
1649
|
feed.id = "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6"
|
1650
|
feed.updated = "2003-12-13T18:30:02Z"
|
1651
|
|
1652
|
link = Link("http://example.org/")
|
1653
|
feed.links.append(link)
|
1654
|
|
1655
|
author = Author("John Doe")
|
1656
|
feed.authors.append(author)
|
1657
|
|
1658
|
|
1659
|
entry = Entry()
|
1660
|
feed.entries.append(entry)
|
1661
|
entry.id = "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a"
|
1662
|
entry.title = "Atom-Powered Robots Run Amok"
|
1663
|
entry.updated = "2003-12-13T18:30:02Z"
|
1664
|
entry.summary = "Some text."
|
1665
|
|
1666
|
link = Link("http://example.org/2003/12/13/atom03")
|
1667
|
entry.links.append(link)
|
1668
|
|
1669
|
|
1670
|
s = str(xmldoc)
|
1671
|
if s_example != s:
|
1672
|
failed_tests += 1
|
1673
|
print "test case failed:"
|
1674
|
print "The generated XML doesn't match the example. diff follows:"
|
1675
|
print diff(s_example, "s_example", s, "s")
|
1676
|
|
1677
|
|
1678
|
# Test: verify that xmldoc.Validate() succeeds
|
1679
|
|
1680
|
if not xmldoc.Validate():
|
1681
|
failed_tests += 1
|
1682
|
print "test case failed:"
|
1683
|
print "xmldoc.Validate() failed."
|
1684
|
|
1685
|
|
1686
|
# Test: does Element work both nested an non-nested?
|
1687
|
s_test = """\
|
1688
|
<test>
|
1689
|
<test:agent number="007">James Bond</test:agent>
|
1690
|
<test:pet
|
1691
|
nickname="Mei-Mei"
|
1692
|
type="cat">Matrix</test:pet>
|
1693
|
</test>"""
|
1694
|
|
1695
|
class TestPet(Element):
|
1696
|
def __init__(self, name=""):
|
1697
|
Element.__init__(self, "test:pet", None, None)
|
1698
|
self.text = name
|
1699
|
|
1700
|
class TestAgent(Element):
|
1701
|
def __init__(self, name=""):
|
1702
|
Element.__init__(self, "test:agent", None, None)
|
1703
|
self.text = name
|
1704
|
|
1705
|
class Test(Element):
|
1706
|
def __init__(self):
|
1707
|
Element.__init__(self, "test", None, None)
|
1708
|
self.test_agent = TestAgent()
|
1709
|
self.test_pet = TestPet()
|
1710
|
|
1711
|
test = Test()
|
1712
|
test.test_agent = "James Bond"
|
1713
|
test.test_agent.attrs["number"] = "007"
|
1714
|
test.test_pet = "Matrix"
|
1715
|
test.test_pet.attrs["type"] = "cat"
|
1716
|
test.test_pet.attrs["nickname"] = "Mei-Mei"
|
1717
|
|
1718
|
s = str(test)
|
1719
|
if s_test != s:
|
1720
|
failed_tests += 1
|
1721
|
print "test case failed:"
|
1722
|
print "test output doesn't match. diff follows:"
|
1723
|
print diff(s_test, "s_test", s, "s")
|
1724
|
|
1725
|
|
1726
|
if failed_tests > 0:
|
1727
|
print "self-test failed!"
|
1728
|
else:
|
1729
|
print "self-test successful."
|
1730
|
|
1731
|
|
1732
|
|
1733
|
if __name__ == "__main__":
|
1734
|
run_test_cases()
|