-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathsampler.py
More file actions
119 lines (84 loc) · 3.6 KB
/
Copy pathsampler.py
File metadata and controls
119 lines (84 loc) · 3.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# -*- coding: utf-8 -*-
# Copyright (c) 2015 BigML, Inc
# All rights reserved.
"""flatline.sampler
Working locally with Flatline over dataset samples.
:author: jao <jao@bigml.com>
:date: Mon Apr 06, 2015 04:14
"""
import interpreter
import bigml.api as api
import os
## local testing
import requests.packages.urllib3
requests.packages.urllib3.disable_warnings()
class Sampler:
"""The Sampler class automatizes the process of sampling a dataset.
It works by downloading a subset of the dataset rows (using
BigML's sample resources) and subsequently applying to them any
desired Flatline generator.
Example:
sampler = Sampler()
sampler.take_sample('dataset/54e374ab67dc09706d000283', size=4)
sampler.apply_lisp('(+ (f 0) (f 1))')
"""
_interpreter = interpreter.Interpreter()
def __init__(self, username=None, api_key=None, bigml=None):
"""Creates a new instance of a Sampler.
A Sampler is an object able to connect to your BigML account,
retrieve samples of datasets, and apply to those local rows
Flatline transformations. Optionally, you can specify your
api_key and username, or a bigml.api.BigML connection.
Otherwise, we use the environment variables BIGML_USERNAME and
BIGML_API_KEY.
"""
if bigml is None:
username = username or os.environ['BIGML_USERNAME']
api_key = api_key or os.environ['BIGML_API_KEY']
self._bigml = api.BigML(username=username, api_key=api_key)
else:
self._bigml = bigml
self._sample = None
def take_sample(self, dataset_id, size=10):
"""Given the corresponding dataset identifier, retrieve a sample of
its rows with the requested size (number of rows).
"""
sample = self._bigml.create_sample(dataset_id)
qs = "limit=-1&rows=%d" % size
self._sample = self._bigml.check_resource(sample['resource'],
query_string=qs)
def sample(self):
"""Returns the full dictionary of properties of the current sample.
Use 'take_sample' to update the current sample.
"""
if self._sample is None:
return {}
return self._sample['object']['sample']
def rows(self):
"""Returns a list of lists representing the current sample's rows.
See 'take_sample' for updating the current sample and 'sample'
for the full set of its properties.
"""
return self.sample().get('rows')
def fields(self):
"""The list of field descriptors for the current sample.
See 'take_sample' for updating the current sample and 'sample'
for the full set of its properties.
"""
return self.sample().get('fields')
def apply_lisp(self, sexp):
"""Applies the given lisp s-expression to the current sample's rows.
On success, returns new rows generated by 'sexp', as a list of
lists of native Python values. 'sexp' is a string.
You can use 'rows' to retrieve the input rows used by this
function.
"""
return self._interpreter.apply_lisp(sexp, self.rows(), self.sample())
def apply_json(self, json_sexp):
"""Applies a JSON s-expression to the current sample's rows.
Ths JSON s-expression must be represented as a Python list
convertible to JSON, e.g. ["+", 1, ["f", "000000"]].
"""
return self._interpreter.apply_json(json_sexp,
self.rows(),
self.sample())