Skip to content

Commit 68ca998

Browse files
committed
refactor: add parser result dispatcher
1 parent 03f3c9b commit 68ca998

2 files changed

Lines changed: 254 additions & 0 deletions

File tree

feapder/core/result_dispatcher.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Parser result routing for runtime parser controls.
4+
"""
5+
from collections.abc import Iterable
6+
from dataclasses import dataclass
7+
8+
from feapder.network.item import Item
9+
from feapder.network.request import Request
10+
11+
12+
@dataclass
13+
class DispatchResult:
14+
del_request_redis_after_item_to_db: bool = False
15+
del_request_redis_after_request_to_db: bool = False
16+
17+
18+
class ResultDispatcher:
19+
REQUEST_RESULT = 1
20+
ITEM_RESULT = 2
21+
22+
def __init__(
23+
self,
24+
*,
25+
request_buffer,
26+
item_buffer,
27+
deal_request,
28+
sync_request_factory=None,
29+
allow_callable=True,
30+
):
31+
self._request_buffer = request_buffer
32+
self._item_buffer = item_buffer
33+
self._deal_request = deal_request
34+
self._sync_request_factory = sync_request_factory or (lambda request: request)
35+
self._allow_callable = allow_callable
36+
37+
def dispatch(self, parser, request, results):
38+
if results and not isinstance(results, Iterable):
39+
raise Exception(
40+
"%s.%s返回值必须可迭代" % (parser.name, request.callback or "parse")
41+
)
42+
43+
dispatch_result = DispatchResult()
44+
result_type = 0
45+
46+
for result in results or []:
47+
if isinstance(result, Request):
48+
result_type = self.REQUEST_RESULT
49+
result.parser_name = result.parser_name or parser.name
50+
if result.request_sync:
51+
self._deal_request(self._sync_request_factory(result))
52+
else:
53+
self._request_buffer.put_request(result)
54+
dispatch_result.del_request_redis_after_request_to_db = True
55+
56+
elif isinstance(result, Item):
57+
result_type = self.ITEM_RESULT
58+
self._item_buffer.put_item(result)
59+
dispatch_result.del_request_redis_after_item_to_db = True
60+
61+
elif callable(result) and self._allow_callable:
62+
if result_type == self.ITEM_RESULT:
63+
self._item_buffer.put_item(result)
64+
dispatch_result.del_request_redis_after_item_to_db = True
65+
else:
66+
self._request_buffer.put_request(result)
67+
dispatch_result.del_request_redis_after_request_to_db = True
68+
69+
elif result is not None:
70+
raise TypeError(self._format_type_error(parser, request, result))
71+
72+
return dispatch_result
73+
74+
def _format_type_error(self, parser, request, result):
75+
callback_name = (
76+
request.callback
77+
and callable(request.callback)
78+
and getattr(request.callback, "__name__")
79+
or request.callback
80+
) or "parse"
81+
expected = "Request、Item or callback" if self._allow_callable else "Request or Item"
82+
return (
83+
f"{parser.name}.{callback_name} result expect {expected}, "
84+
f"bug get type: {type(result)}"
85+
)

tests/test_result_dispatcher.py

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
import pytest
2+
3+
from feapder.core.result_dispatcher import ResultDispatcher
4+
from feapder.network.item import Item, UpdateItem
5+
from feapder.network.request import Request
6+
7+
8+
class FakeParser:
9+
name = "FakeParser"
10+
11+
12+
class FakeRequestBuffer:
13+
def __init__(self):
14+
self.requests = []
15+
16+
def put_request(self, request):
17+
self.requests.append(request)
18+
19+
20+
class FakeItemBuffer:
21+
def __init__(self):
22+
self.items = []
23+
24+
def put_item(self, item):
25+
self.items.append(item)
26+
27+
28+
def test_dispatcher_routes_async_request_and_sets_parser_name():
29+
request_buffer = FakeRequestBuffer()
30+
item_buffer = FakeItemBuffer()
31+
calls = []
32+
dispatcher = ResultDispatcher(
33+
request_buffer=request_buffer,
34+
item_buffer=item_buffer,
35+
deal_request=calls.append,
36+
sync_request_factory=lambda request: {"request_obj": request, "request_redis": None},
37+
)
38+
next_request = Request("https://example.com")
39+
40+
result = dispatcher.dispatch(
41+
parser=FakeParser(),
42+
request=Request("https://root.example.com", callback="parse"),
43+
results=[next_request],
44+
)
45+
46+
assert next_request.parser_name == "FakeParser"
47+
assert request_buffer.requests == [next_request]
48+
assert item_buffer.items == []
49+
assert calls == []
50+
assert result.del_request_redis_after_request_to_db is True
51+
assert result.del_request_redis_after_item_to_db is False
52+
53+
54+
def test_dispatcher_routes_sync_request_to_deal_request():
55+
request_buffer = FakeRequestBuffer()
56+
item_buffer = FakeItemBuffer()
57+
calls = []
58+
dispatcher = ResultDispatcher(
59+
request_buffer=request_buffer,
60+
item_buffer=item_buffer,
61+
deal_request=calls.append,
62+
sync_request_factory=lambda request: {"request_obj": request, "request_redis": None},
63+
)
64+
next_request = Request("https://example.com", request_sync=True)
65+
66+
result = dispatcher.dispatch(
67+
parser=FakeParser(),
68+
request=Request("https://root.example.com"),
69+
results=[next_request],
70+
)
71+
72+
assert calls == [{"request_obj": next_request, "request_redis": None}]
73+
assert request_buffer.requests == []
74+
assert result.del_request_redis_after_request_to_db is False
75+
76+
77+
def test_dispatcher_routes_item_and_update_item_to_item_buffer():
78+
request_buffer = FakeRequestBuffer()
79+
item_buffer = FakeItemBuffer()
80+
dispatcher = ResultDispatcher(
81+
request_buffer=request_buffer,
82+
item_buffer=item_buffer,
83+
deal_request=lambda request: None,
84+
)
85+
item = Item(title="one")
86+
update_item = UpdateItem(id=1, title="two")
87+
88+
result = dispatcher.dispatch(
89+
parser=FakeParser(),
90+
request=Request("https://root.example.com"),
91+
results=[item, update_item],
92+
)
93+
94+
assert item_buffer.items == [item, update_item]
95+
assert request_buffer.requests == []
96+
assert result.del_request_redis_after_item_to_db is True
97+
98+
99+
def test_dispatcher_routes_callable_after_item_to_item_buffer():
100+
request_buffer = FakeRequestBuffer()
101+
item_buffer = FakeItemBuffer()
102+
dispatcher = ResultDispatcher(
103+
request_buffer=request_buffer,
104+
item_buffer=item_buffer,
105+
deal_request=lambda request: None,
106+
)
107+
callback = lambda: None
108+
109+
result = dispatcher.dispatch(
110+
parser=FakeParser(),
111+
request=Request("https://root.example.com"),
112+
results=[Item(title="one"), callback],
113+
)
114+
115+
assert item_buffer.items[-1] is callback
116+
assert request_buffer.requests == []
117+
assert result.del_request_redis_after_item_to_db is True
118+
119+
120+
def test_dispatcher_routes_callable_without_prior_item_to_request_buffer():
121+
request_buffer = FakeRequestBuffer()
122+
item_buffer = FakeItemBuffer()
123+
dispatcher = ResultDispatcher(
124+
request_buffer=request_buffer,
125+
item_buffer=item_buffer,
126+
deal_request=lambda request: None,
127+
)
128+
callback = lambda: None
129+
130+
result = dispatcher.dispatch(
131+
parser=FakeParser(),
132+
request=Request("https://root.example.com"),
133+
results=[callback],
134+
)
135+
136+
assert request_buffer.requests == [callback]
137+
assert item_buffer.items == []
138+
assert result.del_request_redis_after_request_to_db is True
139+
140+
141+
def test_dispatcher_rejects_callable_when_disabled():
142+
dispatcher = ResultDispatcher(
143+
request_buffer=FakeRequestBuffer(),
144+
item_buffer=FakeItemBuffer(),
145+
deal_request=lambda request: None,
146+
allow_callable=False,
147+
)
148+
149+
with pytest.raises(TypeError, match="FakeParser.parse result expect Request or Item"):
150+
dispatcher.dispatch(
151+
parser=FakeParser(),
152+
request=Request("https://root.example.com"),
153+
results=[lambda: None],
154+
)
155+
156+
157+
def test_dispatcher_rejects_invalid_result_type():
158+
dispatcher = ResultDispatcher(
159+
request_buffer=FakeRequestBuffer(),
160+
item_buffer=FakeItemBuffer(),
161+
deal_request=lambda request: None,
162+
)
163+
164+
with pytest.raises(TypeError, match="FakeParser.parse result expect Request"):
165+
dispatcher.dispatch(
166+
parser=FakeParser(),
167+
request=Request("https://root.example.com"),
168+
results=[object()],
169+
)

0 commit comments

Comments
 (0)