Added new codec: gzip

dhondta · dhondta · commit 19841a1f4c5e · 2020-12-17T16:03:56.000+01:00
diff --git a/codext/others/__init__.py b/codext/others/__init__.py
@@ -1,5 +1,6 @@
 # -*- coding: UTF-8 -*-
 from .dna import *
+from .gzipp import *
 from .html import *
 from .letters import *
 from .markdown import *
diff --git a/codext/others/gzipp.py b/codext/others/gzipp.py
@@ -0,0 +1,45 @@
+# -*- coding: UTF-8 -*-
+"""Gzip Codec - gzip content compression.
+
+NB: Not an encoding properly speaking.
+
+This codec:
+- en/decodes strings from str to str
+- en/decodes strings from bytes to bytes
+- decodes file content to str (read)
+- encodes file content from str to bytes (write)
+"""
+import zlib
+from gzip import GzipFile
+
+from ..__common__ import *
+
+
+__examples__ = {'enc-dec(gzip)': ["test", "This is a test"]}
+__guess__ = ["gzip"]
+
+
+def gzip_encode(text, errors="strict"):
+    out = BytesIO()
+    with GzipFile(fileobj=out, mode="wb") as f:
+        f.write(b(text))
+    return out.getvalue(), len(text)
+
+
+def gzip_decode(data, errors="strict"):
+    # then try decompressing considering the file signature
+    try:
+        with GzipFile(fileobj=BytesIO(b(data)), mode="rb") as f:
+            r = f.read()
+    except:
+        pass
+    # try decompressing without considering the file signature
+    try:
+        r = zlib.decompress(b(data), 16 + zlib.MAX_WBITS)
+    except:
+        return handle_error("gzip", errors, decode=True)(data[0], 0) if len(data) > 0 else "", len(data)
+    return r, len(r)
+
+
+add("gzip", gzip_encode, gzip_decode)
+
diff --git a/docs/enc/others.md b/docs/enc/others.md
@@ -29,6 +29,23 @@ CACTCGGTCGGCCATATGTTCGGCCATATGTTCGTCTGTTCACTCGCCCATACACT
 
 -----
 
+### GZip
+
+This is, of course, not an encoding properly speaking, but it is implemented for the sake of convenience.
+
+**Codec** | **Conversions** | **Aliases** | **Comment**
+:---: | :---: | --- | ---
+`gzip` | data <-> GZipped data |  | decoding tries with and without the file signature
+
+```python
+>>> codext.encode('test', "gzip")
+'\x1f\x8b\x08\x00\x0esÛ_\x02ÿ+I-.\x01\x00\x0c~\x7fØ\x04\x00\x00\x00'
+>>> codext.decode('\x1f\x8b\x08\x00\x0esÛ_\x02ÿ+I-.\x01\x00\x0c~\x7fØ\x04\x00\x00\x00', "gzip")
+'test'
+```
+
+-----
+
 ### HTML Entities
 
 This implements the full list of characters available at [this reference](https://dev.w3.org/html5/html-author/charref).