Coverage for xattrfile/__init__.py: 80%

252 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-08-31 02:45 +0000

1# -*- coding: utf-8 -*- 

2 

3import shutil 

4import logging 

5import os 

6import six 

7import redis 

8import hashlib 

9import codecs 

10 

11# PDOC3 configuration 

12__pdoc__ = { 

13 "set_tag": False, 

14 "get_tag": False, 

15 "print_tags": False, 

16 "unittests_get_redis_callable": False, 

17 "metwork_get_redis_callable": False 

18} 

19 

20#: Logger definition 

21DEFAULT_LOGGER = logging.getLogger("xattrfile") 

22 

23 

24#: Redis instance to manage extended attributes 

25RED = None 

26 

27UNITTESTS_RED = None 

28 

29#: MFMODULE_RUNTIME_HOME value 

30MFMODULE_RUNTIME_HOME = os.environ.get('MFMODULE_RUNTIME_HOME', None) 

31 

32 

33def unittests_get_redis_callable(): 

34 global UNITTESTS_RED 

35 from mockredis import mock_redis_client 

36 if UNITTESTS_RED is None: 

37 UNITTESTS_RED = mock_redis_client() 

38 return UNITTESTS_RED 

39 

40 

41def metwork_get_redis_callable(): 

42 """Create or return a redis instance in a metwork module context. 

43 

44 To create an instance, we use a unix socket connection on 

45 ${MFMODULE_RUNTIME_HOME}/var/redis.socket 

46 

47 Returns: 

48 redis connection (redis.Redis object). 

49 

50 """ 

51 global RED 

52 # If the connection is not initialized, create a new one 

53 if RED is None: 

54 if MFMODULE_RUNTIME_HOME is None: 

55 RED = unittests_get_redis_callable() 

56 else: 

57 socket_path = os.path.join(MFMODULE_RUNTIME_HOME, "var", 

58 "redis.socket") 

59 RED = redis.Redis(unix_socket_path=socket_path) 

60 return RED 

61 

62 

63class DictWithDirtyFlag(dict): 

64 """Dictionnary with modification (dirty) flag. 

65 

66 This class overrides dict class. When this dict is modified, the dirty 

67 flag is set to True. Of course, you can reset the dirty flag manually. 

68 It is just a flag. You have to implement what you want with it. 

69 

70 Example: 

71 

72 ```python 

73 >>> d = DictWithDirtyFlag() 

74 >>> d.dirty 

75 False 

76 >>> d['foo'] = 'bar' 

77 >>> d.dirty 

78 True 

79 >>> # do what you want... 

80 >>> d.dirty = False 

81 >>> d['foo2'] = 'bar2' 

82 >>> d.dirty 

83 True 

84 ``` 

85 

86 Attributes: 

87 dirty (boolean): the dirty flag 

88 

89 """ 

90 

91 dirty = False 

92 

93 def __setitem__(self, item, value): 

94 self.dirty = True 

95 return super(DictWithDirtyFlag, self).__setitem__(item, value) 

96 

97 def __delitem__(self, item): 

98 self.dirty = True 

99 return super(DictWithDirtyFlag, self).__delitem__(item) 

100 

101 

102class BytesDictWithDirtyFlag(DictWithDirtyFlag): 

103 """Dictionnary with modification (dirty) flag for bytes keys/values. 

104 

105 This class overrides DictWithDirtyFlag class. It adds checks and 

106 conversions to be sure that both keys and values are bytes strings. 

107 

108 Example (in python3): 

109 

110 ```python 

111 >>> d = BytesDictWithDirtyFlag() 

112 d['foo'] = 'bar' 

113 >>> d.dirty 

114 True 

115 >>> d['foo'] 

116 b'bar' 

117 >>> d[b'foo'] 

118 b'bar' 

119 ``` 

120 

121 """ 

122 

123 def __setitem__(self, key, value): 

124 new_key = self.__convert_key_to_bytes(key) 

125 new_value = self.__convert_key_to_bytes(value) 

126 return super(BytesDictWithDirtyFlag, self).__setitem__(new_key, 

127 new_value) 

128 

129 def __delitem__(self, key): 

130 new_key = self.__convert_key_to_bytes(key) 

131 return super(BytesDictWithDirtyFlag, self).__delitem__(new_key) 

132 

133 def __getitem__(self, key): 

134 new_key = self.__convert_key_to_bytes(key) 

135 return super(BytesDictWithDirtyFlag, self).__getitem__(new_key) 

136 

137 def __contains__(self, key): 

138 new_key = self.__convert_key_to_bytes(key) 

139 return super(BytesDictWithDirtyFlag, self).__contains__(new_key) 

140 

141 def get(self, key, default=None): 

142 new_key = self.__convert_key_to_bytes(key) 

143 return super(BytesDictWithDirtyFlag, self).get(new_key, default) 

144 

145 def __convert_key_to_bytes(self, strg): 

146 if isinstance(strg, six.text_type): 

147 try: 

148 return strg.encode('utf8') 

149 except UnicodeEncodeError: 

150 raise Exception("can't encode to utf8 unicode value: %s" % 

151 strg) 

152 elif isinstance(strg, six.binary_type): 

153 return strg 

154 else: 

155 raise Exception("can't use %s as key or value in a " 

156 "BytesDictWithDirtyFlag dict" % type(strg)) 

157 

158 

159class XattrFile(object): 

160 """File with attributes. 

161 

162 At the beginning, this class was a wrapper around files with POSIX 

163 extended attributes (xattr). But because of xattr limitations, we store 

164 attributes into a redis instance. The name of this class should be 

165 changed one day. 

166 

167 Attributes are stored inside the object. They are lazy loaded from redis. 

168 

169 You can access them through tags attributes as a `BytesDictWithDirtyFlag` 

170 dict. If you made some modifications on theses tags, you can force 

171 a redis write with commit() method. But main public methods on the 

172 object do it for you. And there is an automatic destructor to do that 

173 if necessary. 

174 

175 You should not manipulate corresponding filepath directly (readonly access 

176 is ok) to avoid incoherences. Please use public methods on the file 

177 to copy/delete/rename/... it. 

178 

179 """ 

180 

181 def __init__(self, filepath, 

182 get_redis_callable=metwork_get_redis_callable, 

183 redis_timeout=86400): 

184 """Constructor. 

185 

186 Args: 

187 filepath (string): full file path. 

188 get_redis_callable (callable): a function called with no arg which 

189 has to return a connected redis.Redis object to a redis 

190 instance to read/store attributes. 

191 redis_timeout (int): lifetime (in seconds) of redis keys (-1: means 

192 no timeout) => it means that you will loose attributes on a 

193 given file after this timeout (if no modification). 

194 

195 Raises: 

196 IOError: if the given path does not exist or is not a file. 

197 

198 """ 

199 self.__logger = None 

200 self.__filepath = None 

201 self.__redis_key_cache = None 

202 self.__tags = None 

203 self.get_redis_callable = get_redis_callable 

204 """function to get a connected redispy instance.""" 

205 self.redis_timeout = redis_timeout 

206 """lifetime (in seconds) or redis keys (-1 => no timeout), it means 

207 that you will loose attributes on a given file after this timeout 

208 (if no modification).""" 

209 if not os.path.exists(filepath): 

210 raise IOError(2, 'No such file or directory', filepath) 

211 if not os.path.isfile(filepath): 

212 raise IOError(2, 'Is not a file', filepath) 

213 self.__set_filepath(filepath) 

214 

215 def __del__(self): 

216 if self.__filepath: 

217 if not os.path.exists(self.__filepath): 

218 if self.get_redis_callable().delete(self._redis_key) > 0: 

219 self.logger.warning("%s path does not exist anymore => " 

220 "we removed corresponding attributes " 

221 "in redis" % self.__filepath) 

222 return 

223 else: 

224 self.commit() 

225 

226 @property 

227 def logger(self): 

228 """a configured logger object (lazy property).""" 

229 if self.__logger is None: 

230 self.__logger = DEFAULT_LOGGER 

231 return self.__logger 

232 

233 def commit(self): 

234 """Write tags into redis (if they are dirty). 

235 

236 Raises: 

237 redis.RedisError: if there is a problem with redis. 

238 

239 """ 

240 self._write_tags() 

241 

242 @property 

243 def tags(self): 

244 """BytesDictWithDirtyFlag object (can be used as a dict of bytes) 

245 containting all tags of the current file.""" 

246 if self.__tags is None: 

247 self._read_tags() 

248 return self.__tags 

249 

250 def __set_tags(self, new_tags): 

251 """Set tags by copying a dict of new tags.""" 

252 self.__tags = BytesDictWithDirtyFlag({x: y 

253 for x, y in new_tags.items()}) 

254 self.__tags.dirty = True 

255 

256 def clear_tags(self): 

257 self.__set_tags({}) 

258 self._write_tags() 

259 

260 @property 

261 def _redis_key(self): 

262 """Get the redis key to store attributes as hash. 

263 

264 This value depends only on full filepath and it is cached into 

265 __redis_key_cache attribute. 

266 

267 """ 

268 if not self.__redis_key_cache: 

269 self.__redis_key_cache = "xattr_%s" % \ 

270 hashlib.md5(self.filepath.encode('utf-8')).hexdigest() 

271 return self.__redis_key_cache 

272 

273 @property 

274 def filepath(self): 

275 """The full (absolute) filepath (string) of the file.""" 

276 return self.__filepath 

277 

278 def __set_filepath(self, filepath): 

279 """Set/Change filepath and reset __redis_key_cache attribute.""" 

280 self.__filepath = os.path.abspath(filepath) 

281 self.__redis_key_cache = None 

282 

283 def _write_tags(self, force=False): 

284 """Write tags in redis. 

285 

286 Args: 

287 force (boolean): if True, tags are written to redis even if they 

288 are not "dirty". 

289 

290 Raises: 

291 redis.RedisError: if there is a problem with redis. 

292 

293 """ 

294 if self.tags.dirty or force: 

295 r = self.get_redis_callable() 

296 pipe = r.pipeline() 

297 # Clear the Redis hash to make sure no old tags are present 

298 # The hash is then recreated from scratch 

299 pipe.delete(self._redis_key) 

300 for (key, value) in self.tags.items(): 

301 pipe.hset(self._redis_key, key, value) 

302 if self.redis_timeout != -1: 

303 pipe.expire(self._redis_key, self.redis_timeout) 

304 pipe.execute() 

305 self.tags.dirty = False 

306 

307 def _read_tags(self): 

308 """Read tags from redis and overwrite __tags variable. 

309 

310 Raises: 

311 redis.RedisError: if there is a problem with redis. 

312 

313 """ 

314 r = self.get_redis_callable() 

315 self.__tags = BytesDictWithDirtyFlag(r.hgetall(self._redis_key)) 

316 

317 def copy_tags_on(self, filepath): 

318 """Copy current tags to another file and returns corresponding XattrFile. 

319 

320 The destination filepath must exist. If not, use copy() method. 

321 

322 Note: tags are commited to redis before the copy. 

323 

324 Args: 

325 filepath (string): complete filepath to copy tags on. 

326 

327 Returns: 

328 Xattrfile corresponding to given filepath 

329 with current tags copied on. 

330 

331 Raises: 

332 redis.RedisError: if there is a problem with redis. 

333 IOError: if the given path does not exist or is not a file. 

334 

335 """ 

336 self._write_tags() 

337 new_xaf = XattrFile(filepath, 

338 get_redis_callable=self.get_redis_callable) 

339 new_xaf.__set_tags(self.tags) 

340 new_xaf._write_tags() 

341 return new_xaf 

342 

343 def copy(self, new_filepath, tmp_suffix=".t", chmod_mode_int=None): 

344 """Copy of the file (and its tags) with temporary suffix. 

345 

346 The temporary suffix is used during the copy to get a kind of atomic 

347 operation. 

348 

349 Note: tags are commited to redis during the copy. 

350 

351 Args: 

352 new_filepath (string): filepath to copy on. 

353 tmp_suffix (string): temporary suffix during copy 

354 (None means no temporary suffix). 

355 chmod_mode_int (integer): if set, chmod mode as integer 

356 (not octal !) (int('0755', 8) for example to get the integer 

357 value of well known '0755' octal value). 

358 

359 Returns: 

360 a new Xattrfile corresponding to the copied file. 

361 

362 Raises: 

363 redis.RedisError: if there is a problem with redis. 

364 IOError: can't do the copy. 

365 

366 """ 

367 tmp_filepath = new_filepath 

368 if tmp_suffix is not None: 

369 tmp_filepath = tmp_filepath + tmp_suffix 

370 shutil.copy2(self.filepath, tmp_filepath) 

371 self.logger.debug("%s copied to %s", self.filepath, tmp_filepath) 

372 xattr_f = self.copy_tags_on(tmp_filepath) 

373 if chmod_mode_int is not None: 

374 xattr_f.chmod(chmod_mode_int) 

375 if tmp_suffix is not None: 

376 xattr_f.rename(new_filepath) 

377 return xattr_f 

378 

379 def rename(self, new_filepath): 

380 """Move file (and its tags) to another path (in the same filesystem). 

381 

382 Tags are preserved and written before the operation. 

383 

384 Args: 

385 new_filepath (string): new filepath. 

386 

387 Raises: 

388 redis.RedisError: if there is a problem with redis. 

389 IOError: can't do the rename at a filesystem level. 

390 

391 """ 

392 self._write_tags() 

393 old_hash_md5 = self._redis_key 

394 old_filepath = self.filepath 

395 self.__set_filepath(new_filepath) 

396 # Rename the xattr Redis hash only if it exists 

397 # (If a file has no xattr, there is no Redis entry corresponding 

398 # to that file) 

399 if self.tags: 

400 if self.redis_timeout != -1: 

401 pipe = self.get_redis_callable().pipeline() 

402 pipe.rename(old_hash_md5, self._redis_key) 

403 pipe.expire(self._redis_key, self.redis_timeout) 

404 pipe.execute() 

405 else: 

406 self.get_redis_callable().rename(old_hash_md5, self._redis_key) 

407 try: 

408 os.rename(old_filepath, new_filepath) 

409 except Exception: 

410 # we have to rollback all changes done 

411 # this is necessary to move_or_copy operations 

412 if self.tags: 

413 if self.redis_timeout != -1: 

414 pipe = self.get_redis_callable().pipeline() 

415 pipe.rename(self._redis_key, old_hash_md5) 

416 pipe.expire(old_hash_md5, self.redis_timeout) 

417 pipe.execute() 

418 else: 

419 self.get_redis_callable().rename(self._redis_key, 

420 old_hash_md5) 

421 self.__set_filepath(old_filepath) 

422 raise 

423 self.logger.debug("%s moved to %s" % (old_filepath, new_filepath)) 

424 

425 def delete(self): 

426 """Delete the file and corresponding tags. 

427 

428 Raises: 

429 redis.RedisError: if there is a problem with redis. 

430 IOError: can't do the delete at a filesystem level. 

431 

432 """ 

433 os.unlink(self.filepath) 

434 self.clear_tags() 

435 self.logger.debug("%s deleted" % self.filepath) 

436 

437 def basename(self): 

438 """Get and return the basename of the file.""" 

439 return os.path.basename(self.filepath) 

440 

441 def dirname(self): 

442 """Get and return the dirname of the file.""" 

443 return os.path.dirname(self.filepath) 

444 

445 def getsize(self): 

446 """Return the size of the file (in bytes). 

447 

448 Returns 

449 int: the size of the file (in bytes) or None in case of problems. 

450 

451 """ 

452 try: 

453 return os.path.getsize(self.filepath) 

454 except Exception as e: 

455 self.logger.warning("can't get the size of %s with exception: %s", 

456 self.filepath, e) 

457 return None 

458 

459 def getuid(self): 

460 """Return the uid of the file. 

461 

462 Returns: 

463 int: the uid of the file or None in case of problems. 

464 

465 """ 

466 try: 

467 return os.stat(self.filepath).st_uid 

468 except Exception as e: 

469 self.logger.warning("can't get the uid of %s with exception: %s", 

470 self.filepath, e) 

471 return None 

472 

473 def hard_link(self, new_filepath, tmp_suffix=".t"): 

474 """Create a hard link of the file (and its tags). 

475 

476 The temporary suffix is used during the hardlink to get a kind of 

477 atomic operation. 

478 

479 Note: tags are commited to redis during the hard link. 

480 

481 Args: 

482 new_filepath (string): filepath for the hard link. 

483 tmp_suffix (string): temporary suffix during copy 

484 (None means no temporary suffix). 

485 

486 Returns: 

487 a new Xattrfile corresponding to the new file/link. 

488 

489 Raises: 

490 redis.RedisError: if there is a problem with redis. 

491 IOError: can't do the link at a filesystem level. 

492 

493 """ 

494 tmp_filepath = new_filepath 

495 if tmp_suffix is not None: 

496 tmp_filepath = tmp_filepath + tmp_suffix 

497 os.link(self.filepath, tmp_filepath) 

498 self.logger.debug("%s hardlinked to %s" % 

499 (self.filepath, tmp_filepath)) 

500 xattr_f = self.copy_tags_on(tmp_filepath) 

501 if tmp_suffix is not None: 

502 xattr_f.rename(new_filepath) 

503 return xattr_f 

504 

505 def chmod(self, mode_int): 

506 """Change the mode of the file to the provided numeric mode. 

507 

508 Args: 

509 mode_int (integer): mode as integer (not octal !) (int('0755', 8) 

510 for example to get the integer value of well known '0755' octal 

511 value). 

512 

513 Raises: 

514 IOError: can't do the chmod at a filesystem level. 

515 

516 """ 

517 os.chmod(self.filepath, mode_int) 

518 self.logger.debug("chmod %s changed to %i mode (integer)" % 

519 (self.filepath, mode_int)) 

520 

521 def dump_tags_on_logger(self, logger, lvl): 

522 """Dump tags on the given logger with the given log level.""" 

523 logger.log(lvl, "***** BEGIN DUMP TAGS FOR FILE %s *****", 

524 self.filepath) 

525 for k in sorted(self.tags.keys()): 

526 v = self.tags[k] 

527 logger.log(lvl, "%s = %s", k.decode('utf8'), v.decode('utf8')) 

528 logger.log(lvl, "***** END DUMP TAGS FOR FILE %s *****", self.filepath) 

529 

530 def write_tags_in_a_file(self, filepath): 

531 """Write tags in a utf8 file. 

532 

533 Args: 

534 filepath: filepath of the file to write in. 

535 

536 """ 

537 with codecs.open(filepath, "w", "utf8") as f: 

538 for key in sorted(self.tags.keys()): 

539 value = self.tags[key] 

540 f.write(key.decode('utf8')) 

541 f.write(" = ") 

542 f.write(value.decode('utf8')) 

543 f.write("\n") 

544 

545 def _hardlink_move_or_copy(self, new_filepath, hardlink_mode=True, 

546 tmp_suffix=".t", chmod_mode_int=None): 

547 old_filepath = self.filepath 

548 if chmod_mode_int is None: 

549 try: 

550 if hardlink_mode: 

551 self.hard_link(new_filepath) 

552 else: 

553 self.rename(new_filepath) 

554 return (True, True) 

555 except Exception: 

556 pass 

557 try: 

558 if hardlink_mode: 

559 self.copy(new_filepath, tmp_suffix=tmp_suffix, 

560 chmod_mode_int=chmod_mode_int) 

561 else: 

562 self = self.copy(new_filepath, tmp_suffix=tmp_suffix, 

563 chmod_mode_int=chmod_mode_int) 

564 if not XattrFile(old_filepath).delete_or_nothing(): 

565 self.logger.warning("can't delete %s", old_filepath) 

566 return (False, False) 

567 return (True, False) 

568 except Exception: 

569 if hardlink_mode: 

570 self.logger.warning("can't hardlink/copy %s into %s", 

571 old_filepath, 

572 new_filepath) 

573 else: 

574 self.logger.warning("can't move/copy %s into %s", old_filepath, 

575 new_filepath) 

576 return (False, False) 

577 

578 def move_or_copy(self, new_filepath, tmp_suffix=".t", chmod_mode_int=None): 

579 """Move or copy (only if move failed) without any exceptions. 

580 

581 The original file (and its tags) is deleted (whatever move or copy 

582 is effectively done) and the current object is renamed to new filepath. 

583 

584 Args: 

585 new_filepath (string): complete new filepath towards move/copy. 

586 tmp_suffix (string): temporary suffix during copy 

587 (None means no temporary suffix). 

588 chmod_mode_int (integer): DEPRECATED (do not use). 

589 

590 Returns: 

591 (boolean, boolean): first boolean is True if the operation was ok, 

592 False else ; second boolean is True if the operation was done 

593 with a move, False if the operation was done with a copy. 

594 

595 """ 

596 return self._hardlink_move_or_copy(new_filepath, tmp_suffix=tmp_suffix, 

597 chmod_mode_int=chmod_mode_int, 

598 hardlink_mode=False) 

599 

600 def hardlink_or_copy(self, new_filepath, tmp_suffix=".t", 

601 chmod_mode_int=None): 

602 """Hardlink or copy (only if move failed) without raising exceptions. 

603 

604 The original file (and its tags) is keeped intact and the current 

605 object is not modified. 

606 

607 Args: 

608 new_filepath (string): complete new filepath towards harlink/copy. 

609 tmp_suffix (string): temporary suffix during copy 

610 (None means no temporary suffix). 

611 chmod_mode_int (integer): DEPRECATED (do not use). 

612 

613 Returns: 

614 (boolean, boolean): first boolean is True if the operation was ok, 

615 False else ; second boolean is True if the operation was done 

616 with a hardlink, False if the operation was done with a copy. 

617 

618 """ 

619 return self._hardlink_move_or_copy( 

620 new_filepath, tmp_suffix=tmp_suffix, 

621 chmod_mode_int=chmod_mode_int, 

622 hardlink_mode=True) 

623 

624 def delete_or_nothing(self): 

625 """Delete the file and corresponding tags. 

626 

627 In case of errors, in contrast to delete() method, no exception 

628 is raised. 

629 

630 Returns: 

631 boolean: True if the delete was ok, False else. 

632 

633 """ 

634 try: 

635 self.delete() 

636 return True 

637 except Exception: 

638 pass 

639 return False 

640 

641 def copy_or_nothing(self, new_filepath, tmp_suffix=".t", 

642 chmod_mode_int=None): 

643 """Copy a file without raising exceptions. 

644 

645 In case of errors, in contrast to copy() method, no exception 

646 is raised. 

647 

648 Args: 

649 new_filepath (string): filepath to copy on. 

650 tmp_suffix (string): temporary suffix during copy 

651 (None means no temporary suffix). 

652 chmod_mode_int (integer): if set, chmod mode as integer 

653 (not octal !) (int('0755', 8) for example to get the integer 

654 value of well known '0755' octal value). 

655 

656 Returns: 

657 boolean: True if the copy was ok, False else. 

658 

659 """ 

660 try: 

661 self.copy(new_filepath, tmp_suffix=tmp_suffix, 

662 chmod_mode_int=chmod_mode_int) 

663 return True 

664 except Exception as e: 

665 self.logger.error("can't copy file to %s: %s", new_filepath, e) 

666 return False