Coverage for xattrfile/__init__.py: 80%
252 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-08-31 02:45 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-08-31 02:45 +0000
1# -*- coding: utf-8 -*-
3import shutil
4import logging
5import os
6import six
7import redis
8import hashlib
9import codecs
11# PDOC3 configuration
12__pdoc__ = {
13 "set_tag": False,
14 "get_tag": False,
15 "print_tags": False,
16 "unittests_get_redis_callable": False,
17 "metwork_get_redis_callable": False
18}
20#: Logger definition
21DEFAULT_LOGGER = logging.getLogger("xattrfile")
24#: Redis instance to manage extended attributes
25RED = None
27UNITTESTS_RED = None
29#: MFMODULE_RUNTIME_HOME value
30MFMODULE_RUNTIME_HOME = os.environ.get('MFMODULE_RUNTIME_HOME', None)
33def unittests_get_redis_callable():
34 global UNITTESTS_RED
35 from mockredis import mock_redis_client
36 if UNITTESTS_RED is None:
37 UNITTESTS_RED = mock_redis_client()
38 return UNITTESTS_RED
41def metwork_get_redis_callable():
42 """Create or return a redis instance in a metwork module context.
44 To create an instance, we use a unix socket connection on
45 ${MFMODULE_RUNTIME_HOME}/var/redis.socket
47 Returns:
48 redis connection (redis.Redis object).
50 """
51 global RED
52 # If the connection is not initialized, create a new one
53 if RED is None:
54 if MFMODULE_RUNTIME_HOME is None:
55 RED = unittests_get_redis_callable()
56 else:
57 socket_path = os.path.join(MFMODULE_RUNTIME_HOME, "var",
58 "redis.socket")
59 RED = redis.Redis(unix_socket_path=socket_path)
60 return RED
63class DictWithDirtyFlag(dict):
64 """Dictionnary with modification (dirty) flag.
66 This class overrides dict class. When this dict is modified, the dirty
67 flag is set to True. Of course, you can reset the dirty flag manually.
68 It is just a flag. You have to implement what you want with it.
70 Example:
72 ```python
73 >>> d = DictWithDirtyFlag()
74 >>> d.dirty
75 False
76 >>> d['foo'] = 'bar'
77 >>> d.dirty
78 True
79 >>> # do what you want...
80 >>> d.dirty = False
81 >>> d['foo2'] = 'bar2'
82 >>> d.dirty
83 True
84 ```
86 Attributes:
87 dirty (boolean): the dirty flag
89 """
91 dirty = False
93 def __setitem__(self, item, value):
94 self.dirty = True
95 return super(DictWithDirtyFlag, self).__setitem__(item, value)
97 def __delitem__(self, item):
98 self.dirty = True
99 return super(DictWithDirtyFlag, self).__delitem__(item)
102class BytesDictWithDirtyFlag(DictWithDirtyFlag):
103 """Dictionnary with modification (dirty) flag for bytes keys/values.
105 This class overrides DictWithDirtyFlag class. It adds checks and
106 conversions to be sure that both keys and values are bytes strings.
108 Example (in python3):
110 ```python
111 >>> d = BytesDictWithDirtyFlag()
112 d['foo'] = 'bar'
113 >>> d.dirty
114 True
115 >>> d['foo']
116 b'bar'
117 >>> d[b'foo']
118 b'bar'
119 ```
121 """
123 def __setitem__(self, key, value):
124 new_key = self.__convert_key_to_bytes(key)
125 new_value = self.__convert_key_to_bytes(value)
126 return super(BytesDictWithDirtyFlag, self).__setitem__(new_key,
127 new_value)
129 def __delitem__(self, key):
130 new_key = self.__convert_key_to_bytes(key)
131 return super(BytesDictWithDirtyFlag, self).__delitem__(new_key)
133 def __getitem__(self, key):
134 new_key = self.__convert_key_to_bytes(key)
135 return super(BytesDictWithDirtyFlag, self).__getitem__(new_key)
137 def __contains__(self, key):
138 new_key = self.__convert_key_to_bytes(key)
139 return super(BytesDictWithDirtyFlag, self).__contains__(new_key)
141 def get(self, key, default=None):
142 new_key = self.__convert_key_to_bytes(key)
143 return super(BytesDictWithDirtyFlag, self).get(new_key, default)
145 def __convert_key_to_bytes(self, strg):
146 if isinstance(strg, six.text_type):
147 try:
148 return strg.encode('utf8')
149 except UnicodeEncodeError:
150 raise Exception("can't encode to utf8 unicode value: %s" %
151 strg)
152 elif isinstance(strg, six.binary_type):
153 return strg
154 else:
155 raise Exception("can't use %s as key or value in a "
156 "BytesDictWithDirtyFlag dict" % type(strg))
159class XattrFile(object):
160 """File with attributes.
162 At the beginning, this class was a wrapper around files with POSIX
163 extended attributes (xattr). But because of xattr limitations, we store
164 attributes into a redis instance. The name of this class should be
165 changed one day.
167 Attributes are stored inside the object. They are lazy loaded from redis.
169 You can access them through tags attributes as a `BytesDictWithDirtyFlag`
170 dict. If you made some modifications on theses tags, you can force
171 a redis write with commit() method. But main public methods on the
172 object do it for you. And there is an automatic destructor to do that
173 if necessary.
175 You should not manipulate corresponding filepath directly (readonly access
176 is ok) to avoid incoherences. Please use public methods on the file
177 to copy/delete/rename/... it.
179 """
181 def __init__(self, filepath,
182 get_redis_callable=metwork_get_redis_callable,
183 redis_timeout=86400):
184 """Constructor.
186 Args:
187 filepath (string): full file path.
188 get_redis_callable (callable): a function called with no arg which
189 has to return a connected redis.Redis object to a redis
190 instance to read/store attributes.
191 redis_timeout (int): lifetime (in seconds) of redis keys (-1: means
192 no timeout) => it means that you will loose attributes on a
193 given file after this timeout (if no modification).
195 Raises:
196 IOError: if the given path does not exist or is not a file.
198 """
199 self.__logger = None
200 self.__filepath = None
201 self.__redis_key_cache = None
202 self.__tags = None
203 self.get_redis_callable = get_redis_callable
204 """function to get a connected redispy instance."""
205 self.redis_timeout = redis_timeout
206 """lifetime (in seconds) or redis keys (-1 => no timeout), it means
207 that you will loose attributes on a given file after this timeout
208 (if no modification)."""
209 if not os.path.exists(filepath):
210 raise IOError(2, 'No such file or directory', filepath)
211 if not os.path.isfile(filepath):
212 raise IOError(2, 'Is not a file', filepath)
213 self.__set_filepath(filepath)
215 def __del__(self):
216 if self.__filepath:
217 if not os.path.exists(self.__filepath):
218 if self.get_redis_callable().delete(self._redis_key) > 0:
219 self.logger.warning("%s path does not exist anymore => "
220 "we removed corresponding attributes "
221 "in redis" % self.__filepath)
222 return
223 else:
224 self.commit()
226 @property
227 def logger(self):
228 """a configured logger object (lazy property)."""
229 if self.__logger is None:
230 self.__logger = DEFAULT_LOGGER
231 return self.__logger
233 def commit(self):
234 """Write tags into redis (if they are dirty).
236 Raises:
237 redis.RedisError: if there is a problem with redis.
239 """
240 self._write_tags()
242 @property
243 def tags(self):
244 """BytesDictWithDirtyFlag object (can be used as a dict of bytes)
245 containting all tags of the current file."""
246 if self.__tags is None:
247 self._read_tags()
248 return self.__tags
250 def __set_tags(self, new_tags):
251 """Set tags by copying a dict of new tags."""
252 self.__tags = BytesDictWithDirtyFlag({x: y
253 for x, y in new_tags.items()})
254 self.__tags.dirty = True
256 def clear_tags(self):
257 self.__set_tags({})
258 self._write_tags()
260 @property
261 def _redis_key(self):
262 """Get the redis key to store attributes as hash.
264 This value depends only on full filepath and it is cached into
265 __redis_key_cache attribute.
267 """
268 if not self.__redis_key_cache:
269 self.__redis_key_cache = "xattr_%s" % \
270 hashlib.md5(self.filepath.encode('utf-8')).hexdigest()
271 return self.__redis_key_cache
273 @property
274 def filepath(self):
275 """The full (absolute) filepath (string) of the file."""
276 return self.__filepath
278 def __set_filepath(self, filepath):
279 """Set/Change filepath and reset __redis_key_cache attribute."""
280 self.__filepath = os.path.abspath(filepath)
281 self.__redis_key_cache = None
283 def _write_tags(self, force=False):
284 """Write tags in redis.
286 Args:
287 force (boolean): if True, tags are written to redis even if they
288 are not "dirty".
290 Raises:
291 redis.RedisError: if there is a problem with redis.
293 """
294 if self.tags.dirty or force:
295 r = self.get_redis_callable()
296 pipe = r.pipeline()
297 # Clear the Redis hash to make sure no old tags are present
298 # The hash is then recreated from scratch
299 pipe.delete(self._redis_key)
300 for (key, value) in self.tags.items():
301 pipe.hset(self._redis_key, key, value)
302 if self.redis_timeout != -1:
303 pipe.expire(self._redis_key, self.redis_timeout)
304 pipe.execute()
305 self.tags.dirty = False
307 def _read_tags(self):
308 """Read tags from redis and overwrite __tags variable.
310 Raises:
311 redis.RedisError: if there is a problem with redis.
313 """
314 r = self.get_redis_callable()
315 self.__tags = BytesDictWithDirtyFlag(r.hgetall(self._redis_key))
317 def copy_tags_on(self, filepath):
318 """Copy current tags to another file and returns corresponding XattrFile.
320 The destination filepath must exist. If not, use copy() method.
322 Note: tags are commited to redis before the copy.
324 Args:
325 filepath (string): complete filepath to copy tags on.
327 Returns:
328 Xattrfile corresponding to given filepath
329 with current tags copied on.
331 Raises:
332 redis.RedisError: if there is a problem with redis.
333 IOError: if the given path does not exist or is not a file.
335 """
336 self._write_tags()
337 new_xaf = XattrFile(filepath,
338 get_redis_callable=self.get_redis_callable)
339 new_xaf.__set_tags(self.tags)
340 new_xaf._write_tags()
341 return new_xaf
343 def copy(self, new_filepath, tmp_suffix=".t", chmod_mode_int=None):
344 """Copy of the file (and its tags) with temporary suffix.
346 The temporary suffix is used during the copy to get a kind of atomic
347 operation.
349 Note: tags are commited to redis during the copy.
351 Args:
352 new_filepath (string): filepath to copy on.
353 tmp_suffix (string): temporary suffix during copy
354 (None means no temporary suffix).
355 chmod_mode_int (integer): if set, chmod mode as integer
356 (not octal !) (int('0755', 8) for example to get the integer
357 value of well known '0755' octal value).
359 Returns:
360 a new Xattrfile corresponding to the copied file.
362 Raises:
363 redis.RedisError: if there is a problem with redis.
364 IOError: can't do the copy.
366 """
367 tmp_filepath = new_filepath
368 if tmp_suffix is not None:
369 tmp_filepath = tmp_filepath + tmp_suffix
370 shutil.copy2(self.filepath, tmp_filepath)
371 self.logger.debug("%s copied to %s", self.filepath, tmp_filepath)
372 xattr_f = self.copy_tags_on(tmp_filepath)
373 if chmod_mode_int is not None:
374 xattr_f.chmod(chmod_mode_int)
375 if tmp_suffix is not None:
376 xattr_f.rename(new_filepath)
377 return xattr_f
379 def rename(self, new_filepath):
380 """Move file (and its tags) to another path (in the same filesystem).
382 Tags are preserved and written before the operation.
384 Args:
385 new_filepath (string): new filepath.
387 Raises:
388 redis.RedisError: if there is a problem with redis.
389 IOError: can't do the rename at a filesystem level.
391 """
392 self._write_tags()
393 old_hash_md5 = self._redis_key
394 old_filepath = self.filepath
395 self.__set_filepath(new_filepath)
396 # Rename the xattr Redis hash only if it exists
397 # (If a file has no xattr, there is no Redis entry corresponding
398 # to that file)
399 if self.tags:
400 if self.redis_timeout != -1:
401 pipe = self.get_redis_callable().pipeline()
402 pipe.rename(old_hash_md5, self._redis_key)
403 pipe.expire(self._redis_key, self.redis_timeout)
404 pipe.execute()
405 else:
406 self.get_redis_callable().rename(old_hash_md5, self._redis_key)
407 try:
408 os.rename(old_filepath, new_filepath)
409 except Exception:
410 # we have to rollback all changes done
411 # this is necessary to move_or_copy operations
412 if self.tags:
413 if self.redis_timeout != -1:
414 pipe = self.get_redis_callable().pipeline()
415 pipe.rename(self._redis_key, old_hash_md5)
416 pipe.expire(old_hash_md5, self.redis_timeout)
417 pipe.execute()
418 else:
419 self.get_redis_callable().rename(self._redis_key,
420 old_hash_md5)
421 self.__set_filepath(old_filepath)
422 raise
423 self.logger.debug("%s moved to %s" % (old_filepath, new_filepath))
425 def delete(self):
426 """Delete the file and corresponding tags.
428 Raises:
429 redis.RedisError: if there is a problem with redis.
430 IOError: can't do the delete at a filesystem level.
432 """
433 os.unlink(self.filepath)
434 self.clear_tags()
435 self.logger.debug("%s deleted" % self.filepath)
437 def basename(self):
438 """Get and return the basename of the file."""
439 return os.path.basename(self.filepath)
441 def dirname(self):
442 """Get and return the dirname of the file."""
443 return os.path.dirname(self.filepath)
445 def getsize(self):
446 """Return the size of the file (in bytes).
448 Returns
449 int: the size of the file (in bytes) or None in case of problems.
451 """
452 try:
453 return os.path.getsize(self.filepath)
454 except Exception as e:
455 self.logger.warning("can't get the size of %s with exception: %s",
456 self.filepath, e)
457 return None
459 def getuid(self):
460 """Return the uid of the file.
462 Returns:
463 int: the uid of the file or None in case of problems.
465 """
466 try:
467 return os.stat(self.filepath).st_uid
468 except Exception as e:
469 self.logger.warning("can't get the uid of %s with exception: %s",
470 self.filepath, e)
471 return None
473 def hard_link(self, new_filepath, tmp_suffix=".t"):
474 """Create a hard link of the file (and its tags).
476 The temporary suffix is used during the hardlink to get a kind of
477 atomic operation.
479 Note: tags are commited to redis during the hard link.
481 Args:
482 new_filepath (string): filepath for the hard link.
483 tmp_suffix (string): temporary suffix during copy
484 (None means no temporary suffix).
486 Returns:
487 a new Xattrfile corresponding to the new file/link.
489 Raises:
490 redis.RedisError: if there is a problem with redis.
491 IOError: can't do the link at a filesystem level.
493 """
494 tmp_filepath = new_filepath
495 if tmp_suffix is not None:
496 tmp_filepath = tmp_filepath + tmp_suffix
497 os.link(self.filepath, tmp_filepath)
498 self.logger.debug("%s hardlinked to %s" %
499 (self.filepath, tmp_filepath))
500 xattr_f = self.copy_tags_on(tmp_filepath)
501 if tmp_suffix is not None:
502 xattr_f.rename(new_filepath)
503 return xattr_f
505 def chmod(self, mode_int):
506 """Change the mode of the file to the provided numeric mode.
508 Args:
509 mode_int (integer): mode as integer (not octal !) (int('0755', 8)
510 for example to get the integer value of well known '0755' octal
511 value).
513 Raises:
514 IOError: can't do the chmod at a filesystem level.
516 """
517 os.chmod(self.filepath, mode_int)
518 self.logger.debug("chmod %s changed to %i mode (integer)" %
519 (self.filepath, mode_int))
521 def dump_tags_on_logger(self, logger, lvl):
522 """Dump tags on the given logger with the given log level."""
523 logger.log(lvl, "***** BEGIN DUMP TAGS FOR FILE %s *****",
524 self.filepath)
525 for k in sorted(self.tags.keys()):
526 v = self.tags[k]
527 logger.log(lvl, "%s = %s", k.decode('utf8'), v.decode('utf8'))
528 logger.log(lvl, "***** END DUMP TAGS FOR FILE %s *****", self.filepath)
530 def write_tags_in_a_file(self, filepath):
531 """Write tags in a utf8 file.
533 Args:
534 filepath: filepath of the file to write in.
536 """
537 with codecs.open(filepath, "w", "utf8") as f:
538 for key in sorted(self.tags.keys()):
539 value = self.tags[key]
540 f.write(key.decode('utf8'))
541 f.write(" = ")
542 f.write(value.decode('utf8'))
543 f.write("\n")
545 def _hardlink_move_or_copy(self, new_filepath, hardlink_mode=True,
546 tmp_suffix=".t", chmod_mode_int=None):
547 old_filepath = self.filepath
548 if chmod_mode_int is None:
549 try:
550 if hardlink_mode:
551 self.hard_link(new_filepath)
552 else:
553 self.rename(new_filepath)
554 return (True, True)
555 except Exception:
556 pass
557 try:
558 if hardlink_mode:
559 self.copy(new_filepath, tmp_suffix=tmp_suffix,
560 chmod_mode_int=chmod_mode_int)
561 else:
562 self = self.copy(new_filepath, tmp_suffix=tmp_suffix,
563 chmod_mode_int=chmod_mode_int)
564 if not XattrFile(old_filepath).delete_or_nothing():
565 self.logger.warning("can't delete %s", old_filepath)
566 return (False, False)
567 return (True, False)
568 except Exception:
569 if hardlink_mode:
570 self.logger.warning("can't hardlink/copy %s into %s",
571 old_filepath,
572 new_filepath)
573 else:
574 self.logger.warning("can't move/copy %s into %s", old_filepath,
575 new_filepath)
576 return (False, False)
578 def move_or_copy(self, new_filepath, tmp_suffix=".t", chmod_mode_int=None):
579 """Move or copy (only if move failed) without any exceptions.
581 The original file (and its tags) is deleted (whatever move or copy
582 is effectively done) and the current object is renamed to new filepath.
584 Args:
585 new_filepath (string): complete new filepath towards move/copy.
586 tmp_suffix (string): temporary suffix during copy
587 (None means no temporary suffix).
588 chmod_mode_int (integer): DEPRECATED (do not use).
590 Returns:
591 (boolean, boolean): first boolean is True if the operation was ok,
592 False else ; second boolean is True if the operation was done
593 with a move, False if the operation was done with a copy.
595 """
596 return self._hardlink_move_or_copy(new_filepath, tmp_suffix=tmp_suffix,
597 chmod_mode_int=chmod_mode_int,
598 hardlink_mode=False)
600 def hardlink_or_copy(self, new_filepath, tmp_suffix=".t",
601 chmod_mode_int=None):
602 """Hardlink or copy (only if move failed) without raising exceptions.
604 The original file (and its tags) is keeped intact and the current
605 object is not modified.
607 Args:
608 new_filepath (string): complete new filepath towards harlink/copy.
609 tmp_suffix (string): temporary suffix during copy
610 (None means no temporary suffix).
611 chmod_mode_int (integer): DEPRECATED (do not use).
613 Returns:
614 (boolean, boolean): first boolean is True if the operation was ok,
615 False else ; second boolean is True if the operation was done
616 with a hardlink, False if the operation was done with a copy.
618 """
619 return self._hardlink_move_or_copy(
620 new_filepath, tmp_suffix=tmp_suffix,
621 chmod_mode_int=chmod_mode_int,
622 hardlink_mode=True)
624 def delete_or_nothing(self):
625 """Delete the file and corresponding tags.
627 In case of errors, in contrast to delete() method, no exception
628 is raised.
630 Returns:
631 boolean: True if the delete was ok, False else.
633 """
634 try:
635 self.delete()
636 return True
637 except Exception:
638 pass
639 return False
641 def copy_or_nothing(self, new_filepath, tmp_suffix=".t",
642 chmod_mode_int=None):
643 """Copy a file without raising exceptions.
645 In case of errors, in contrast to copy() method, no exception
646 is raised.
648 Args:
649 new_filepath (string): filepath to copy on.
650 tmp_suffix (string): temporary suffix during copy
651 (None means no temporary suffix).
652 chmod_mode_int (integer): if set, chmod mode as integer
653 (not octal !) (int('0755', 8) for example to get the integer
654 value of well known '0755' octal value).
656 Returns:
657 boolean: True if the copy was ok, False else.
659 """
660 try:
661 self.copy(new_filepath, tmp_suffix=tmp_suffix,
662 chmod_mode_int=chmod_mode_int)
663 return True
664 except Exception as e:
665 self.logger.error("can't copy file to %s: %s", new_filepath, e)
666 return False