Skip to content

Commit 83821bd

Browse files
committed
Added expansion factor for guess mode
1 parent 5feb784 commit 83821bd

17 files changed

Lines changed: 63 additions & 20 deletions

File tree

codext/base/_base.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,39 @@
1313
from ..__info__ import __version__
1414

1515

16+
"""
17+
Curve fitting:
18+
19+
>>> import matplotlib.pyplot as plt
20+
>>> import pandas as pd
21+
>>> import scipy.optimize
22+
>>> from statistics import mean
23+
>>> from tinyscript import random
24+
>>> x, y = [], []
25+
>>> for i in range(2, 256):
26+
v = []
27+
for j in range(16, 2048, 16):
28+
s = random.randstr(j)
29+
v.append(float(len(codext.encode(s, "base%d-generic" % i))) / len(s))
30+
x.append(i)
31+
y.append(mean(v))
32+
>>> data = pd.DataFrame({'base': x, 'expf': y})
33+
>>> def fit(x, y, func, params):
34+
params, cv = scipy.optimize.curve_fit(func, x, y, params)
35+
print(params)
36+
y2 = func(x, *params)
37+
plt.clf()
38+
plt.plot(x, y, ".", color="blue", alpha=.3)
39+
plt.plot(x, y2, color="red", linewidth=3.0)
40+
plt.show()
41+
>>> fit(data['base'], data['expf'], lambda x, a, b, c, d: a / (x**b + c) + d, (1, 1, 1, 1))
42+
[ 0.02841434 0.00512664 -0.99999984 0.01543879]
43+
>>> fit(data['base'], data['expf'], lambda x, a, b, c, d: a / (x**b + c) + d, (.028, .005, -1, .015))
44+
[ 0.02827357 0.00510124 -0.99999984 0.01536941]
45+
"""
46+
EXPANSION_FACTOR = lambda base: 0.02827357 / (base**0.00510124-0.99999984) + 0.01536941
47+
48+
1649
class BaseError(ValueError):
1750
pass
1851

@@ -144,6 +177,7 @@ def _decode(input, errors="strict"):
144177

145178
kwargs['len_charset'] = n
146179
kwargs['printables_rate'] = float(len([c for c in cs if c in printable])) / len(cs)
180+
kwargs['expansion_factor'] = kwargs.pop('expansion_factor', (EXPANSION_FACTOR(n), .05))
147181
n = "base{}".format(n) if name is None else name
148182
kwargs['guess'] = kwargs.get('guess', [n])
149183
add(n, encode, decode, pattern, entropy=nb, **kwargs)
@@ -167,7 +201,8 @@ def _decode(input, errors="strict"):
167201

168202
add("base", encode, decode, r"^base[-_]?([2-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(?:[-_]generic)?$",
169203
guess=["base%d-generic" % i for i in range(2, 255)], entropy=lambda e, n: log(int(n.split("-")[0][4:]), 2),
170-
len_charset=lambda n: int(n.split("-")[0][4:]), printables_rate=1., category="base-generic", penalty=.4)
204+
len_charset=lambda n: int(n.split("-")[0][4:]), printables_rate=1., category="base-generic", penalty=.4,
205+
expansion_factor=lambda f, n: (EXPANSION_FACTOR(int(n.split("-")[0][4:])), .05))
171206

172207

173208
def main(n, ref=None, alt=None, inv=True):

codext/crypto/bacon.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,6 @@
3131
}
3232

3333

34-
add_map("bacon", ENCMAP, ignore_case="both", pattern=r"bacon(?:(?:ian)?[-_]cipher)?([\-_].{2})?$", printables_rate=1.)
34+
add_map("bacon", ENCMAP, ignore_case="both", pattern=r"bacon(?:(?:ian)?[-_]cipher)?([\-_].{2})?$", expansion_factor=5.,
35+
printables_rate=1.)
3536

codext/crypto/citrix.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,5 +47,6 @@ def decode(text, errors="strict"):
4747
return decode
4848

4949

50-
add("citrix", citrix_encode, citrix_decode, r"citrix(|[-_]?(?:ctx)?1)$", entropy=4., printables_rate=1.)
50+
add("citrix", citrix_encode, citrix_decode, r"citrix(|[-_]?(?:ctx)?1)$", entropy=4., printables_rate=1.,
51+
expansion_factor=2.)
5152

codext/languages/ipsum.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,5 +92,6 @@ def ipsum_decode(text, errors="strict"):
9292
return s, len(text)
9393

9494

95-
add("ipsum", ipsum_encode, ipsum_decode, pattern=r"^(?:lorem[-_]?)?ipsum$", printables_rate=1.)
95+
add("ipsum", ipsum_encode, ipsum_decode, pattern=r"^(?:lorem[-_]?)?ipsum$", printables_rate=1.,
96+
expansion_factor=(6., .5))
9697

codext/languages/leetspeak.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,5 @@
1919
ENCMAP = {k: v for k, v in zip("aabeliostzg", "@4831105729")}
2020

2121

22-
add_map("leet", ENCMAP, ignore_case="encode", no_error=True, pattern=r"(?:leet|1337|leetspeak)$",
23-
entropy=lambda e: e, expansion_factor=1.)
22+
add_map("leet", ENCMAP, ignore_case="encode", no_error=True, pattern=r"(?:leet|1337|leetspeak)$", entropy=lambda e: e)
2423

codext/languages/morse.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,6 @@
3535
}
3636

3737

38-
add_map("morse", ENCMAP, "#", " ", ignore_case="encode", pattern=r"^morse([-_]?.{3})?$", printables_rate=1.)
38+
add_map("morse", ENCMAP, "#", " ", ignore_case="encode", pattern=r"^morse([-_]?.{3})?$", printables_rate=1.,
39+
expansion_factor=(2.8, .6))
3940

codext/languages/navajo.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,6 @@
3030
}
3131

3232

33-
add_map("navajo", ENCMAP, ignore_case="both", sep=" ", pattern=r"^navajo$", printables_rate=1.)
33+
add_map("navajo", ENCMAP, ignore_case="both", sep=" ", pattern=r"^navajo$", printables_rate=1.,
34+
expansion_factor=(6.2, .8))
3435

codext/languages/radio.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,6 @@
2424
}
2525

2626

27-
add_map("radio", ENCMAP, sep=" ", ignore_case="both",
28-
pattern=r"^(?:military|nato|radio)(?:(?:[-_]phonetic)?(?:[-_]alphabet)?)?$", printables_rate=1.)
27+
add_map("radio", ENCMAP, sep=" ", ignore_case="both", printables_rate=1., expansion_factor=(5.5, .3),
28+
pattern=r"^(?:military|nato|radio)(?:(?:[-_]phonetic)?(?:[-_]alphabet)?)?$")
2929

codext/languages/southpark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,5 @@
4040

4141
add_map("southpark", ENCMAP1, pattern=r"^(?:kenny|southpark)([-_].{6})?$", examples=__examples1__, guess=__guess1__)
4242
add_map("southpark-icase", ENCMAP2, ignore_case="both", pattern=r"^(?:kenny|southpark)[-_]icase([-_].{3})?$",
43-
examples=__examples2__, guess=__guess2__, printables_rate=1.)
43+
examples=__examples2__, guess=__guess2__, printables_rate=1., expansion_factor=3.)
4444

codext/languages/tomtom.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,6 @@
3030
}
3131

3232

33-
add_map("tom-tom", ENCMAP, ".", " ", ignore_case="both", pattern=r"^tom-?tom([-_]?.{3})?$", printables_rate=1.)
33+
add_map("tom-tom", ENCMAP, ".", " ", ignore_case="both", pattern=r"^tom-?tom([-_]?.{3})?$", printables_rate=1.,
34+
expansion_factor=(3.8, .2))
3435

0 commit comments

Comments
 (0)