Skip to content

Commit 19841a1

Browse files
committed
Added new codec: gzip
1 parent 90fd7e1 commit 19841a1

3 files changed

Lines changed: 63 additions & 0 deletions

File tree

codext/others/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: UTF-8 -*-
22
from .dna import *
3+
from .gzipp import *
34
from .html import *
45
from .letters import *
56
from .markdown import *

codext/others/gzipp.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# -*- coding: UTF-8 -*-
2+
"""Gzip Codec - gzip content compression.
3+
4+
NB: Not an encoding properly speaking.
5+
6+
This codec:
7+
- en/decodes strings from str to str
8+
- en/decodes strings from bytes to bytes
9+
- decodes file content to str (read)
10+
- encodes file content from str to bytes (write)
11+
"""
12+
import zlib
13+
from gzip import GzipFile
14+
15+
from ..__common__ import *
16+
17+
18+
__examples__ = {'enc-dec(gzip)': ["test", "This is a test"]}
19+
__guess__ = ["gzip"]
20+
21+
22+
def gzip_encode(text, errors="strict"):
23+
out = BytesIO()
24+
with GzipFile(fileobj=out, mode="wb") as f:
25+
f.write(b(text))
26+
return out.getvalue(), len(text)
27+
28+
29+
def gzip_decode(data, errors="strict"):
30+
# then try decompressing considering the file signature
31+
try:
32+
with GzipFile(fileobj=BytesIO(b(data)), mode="rb") as f:
33+
r = f.read()
34+
except:
35+
pass
36+
# try decompressing without considering the file signature
37+
try:
38+
r = zlib.decompress(b(data), 16 + zlib.MAX_WBITS)
39+
except:
40+
return handle_error("gzip", errors, decode=True)(data[0], 0) if len(data) > 0 else "", len(data)
41+
return r, len(r)
42+
43+
44+
add("gzip", gzip_encode, gzip_decode)
45+

docs/enc/others.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,23 @@ CACTCGGTCGGCCATATGTTCGGCCATATGTTCGTCTGTTCACTCGCCCATACACT
2929

3030
-----
3131

32+
### GZip
33+
34+
This is, of course, not an encoding properly speaking, but it is implemented for the sake of convenience.
35+
36+
**Codec** | **Conversions** | **Aliases** | **Comment**
37+
:---: | :---: | --- | ---
38+
`gzip` | data <-> GZipped data | | decoding tries with and without the file signature
39+
40+
```python
41+
>>> codext.encode('test', "gzip")
42+
'\x1f\x8b\x08\x00\x0esÛ_\x02ÿ+I-.\x01\x00\x0c~\x7fØ\x04\x00\x00\x00'
43+
>>> codext.decode('\x1f\x8b\x08\x00\x0esÛ_\x02ÿ+I-.\x01\x00\x0c~\x7fØ\x04\x00\x00\x00', "gzip")
44+
'test'
45+
```
46+
47+
-----
48+
3249
### HTML Entities
3350

3451
This implements the full list of characters available at [this reference](https://dev.w3.org/html5/html-author/charref).

0 commit comments

Comments
 (0)