Skip to content

Commit 60a0570

Browse files
authored
Update sensory agents related to the endpoints changes. (#108)
1 parent 1b1243e commit 60a0570

8 files changed

Lines changed: 91 additions & 22 deletions

File tree

.github/workflows/ubuntu.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
runs-on: ${{ matrix.os }}
1818
strategy:
1919
matrix:
20-
os: [ubuntu-20.04, ubuntu-latest]
20+
os: [ubuntu-latest]
2121

2222
steps:
2323
- name: Checkout

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,6 @@ _Embodied Agents are not yet capable of learning from in-context experience_:
148148

149149
- [OpenVLA](https://api.mbodi.ai/community-models/)
150150
- [Sensory Tools](https://api.mbodi.ai/sense/)
151-
- [Embodied AI Playground](https://api.mbodi.ai/benchmark/)
152151

153152
### Roadmap
154153

@@ -157,6 +156,7 @@ _Embodied Agents are not yet capable of learning from in-context experience_:
157156
- [x] Yolo, SAM2, DepthAnything Sensory Agents
158157
- [x] Auto Agent
159158
- [x] Google Gemini Backend
159+
- [ ] Pi0 Motor Agent
160160
- [ ] ROS integration
161161
- [ ] More Motor Agents, i.e. RT1
162162
- [ ] More device support, i.e. OpenCV camera

mbodied/agents/cli.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,8 +391,9 @@ def estimate_depth(ctx, image_filename, model_src, api_name, list, help) -> None
391391
DepthEstimationAgent = smart_import("mbodied.agents.sense", attribute="DepthEstimationAgent")
392392
image = Image(path=image_filename, size=(224, 224))
393393
agent: "DepthEstimationAgent" = DepthEstimationAgent(model_src=model_src)
394-
result = agent.act(image=image, api_name=api_name)
394+
result, depth_array = agent.act(image=image, api_name=api_name)
395395
result.pil.show()
396+
print("Depth array shape", depth_array.shape)
396397

397398

398399
@sense.command("segment")

mbodied/agents/sense/depth_estimation_agent.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import numpy as np
16+
1517
from mbodied.agents.sense.sensory_agent import SensoryAgent
1618
from mbodied.types.sense.vision import Image
1719

@@ -49,12 +51,13 @@ def act(self, image: Image, *args, api_name: str = "/depth", **kwargs) -> Image:
4951
"""
5052
if self.actor is None:
5153
raise ValueError("Remote actor for agent not initialized.")
52-
response = self.actor.predict(image.base64, *args, api_name=api_name, **kwargs)
53-
return Image(response)
54+
response, depth_file = self.actor.predict(image.base64, *args, api_name=api_name, **kwargs)
55+
return Image(response), np.load(depth_file)
5456

5557

5658
# Example usage:
5759
if __name__ == "__main__":
5860
agent = DepthEstimationAgent(model_src="https://api.mbodi.ai/sense/")
59-
result = agent.act(image=Image("resources/bridge_example.jpeg"))
61+
result, depth_array = agent.act(image=Image("resources/bridge_example.jpeg"))
6062
result.pil.show()
63+
print("Depth array shape", depth_array.shape)

mbodied/agents/sense/segmentation_agent.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,11 @@ def act(
5959
else:
6060
raise ValueError("Unsupported input type. Must be BBox2D, List[BBox2D], or PixelCoords.")
6161

62-
segmented_image, masks = self.actor.predict(
62+
segmented_image, masks_file = self.actor.predict(
6363
image.base64, input_type, input_data_str, *args, api_name=api_name, **kwargs
6464
)
6565
# Convert gradio Dataframe numpy to numpy array.
66-
masks = np.array(masks["data"])
66+
masks = np.load(masks_file)
6767
return Image(segmented_image), masks
6868

6969

tests/test_auto_agent.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,19 @@
55
from mbodied.agents.motion.openvla_agent import OpenVlaAgent
66
from mbodied.agents.backends.gradio_backend import GradioBackend
77
from mbodied.agents.auto.auto_agent import AutoAgent, get_agent
8+
import numpy as np
9+
import tempfile
10+
import base64
11+
from io import BytesIO
12+
from PIL import Image as PILImage
13+
14+
15+
def get_dummy_base64_image():
16+
"""Create a minimal valid base64 image for testing."""
17+
img = PILImage.new("RGB", (10, 10), color="red")
18+
buffered = BytesIO()
19+
img.save(buffered, format="JPEG")
20+
return base64.b64encode(buffered.getvalue()).decode()
821

922

1023
@pytest.fixture
@@ -56,22 +69,32 @@ def test_auto_openvla_agent_act(auto_openvla_agent_get_method):
5669

5770
@pytest.fixture
5871
def mock_depth_gradio_backend():
72+
# Create a temp file that would be a valid location for a numpy file
73+
temp_file = tempfile.NamedTemporaryFile(suffix=".npy", delete=False)
74+
temp_path = temp_file.name
75+
temp_file.close()
76+
dummy_image_b64 = get_dummy_base64_image()
77+
5978
with patch.object(GradioBackend, "__init__", lambda x, model_src=None, **kwargs: None):
60-
with patch.object(GradioBackend, "predict", return_value=Image(size=(224, 224))):
61-
yield GradioBackend(endpoint="http://1.2.3.4:1234")
79+
# Return a valid base64 image string and a path
80+
with patch.object(GradioBackend, "predict", return_value=(dummy_image_b64, temp_path)):
81+
# Intercept np.load calls to avoid actual file system access
82+
with patch("numpy.load", return_value=np.zeros((224, 224))):
83+
yield GradioBackend(endpoint="http://1.2.3.4:1234")
6284

6385

6486
@pytest.fixture
6587
def depth_agent(mock_depth_gradio_backend):
6688
agent = AutoAgent(task="sense-depth-estimation", model_src="http://1.2.3.4:1234/")
67-
agent.actor = mock_openvla_gradio_backend
89+
agent.actor = mock_depth_gradio_backend
6890
return agent
6991

7092

7193
def test_auto_depth_agent_act(depth_agent):
7294
mock_image = MagicMock(spec=Image)
7395
mock_image.base64 = "base64encodedimage"
7496

75-
result = depth_agent.act(mock_image)
97+
result, depth_array = depth_agent.act(mock_image)
7698

7799
assert isinstance(result, Image)
100+
assert isinstance(depth_array, np.ndarray)

tests/test_depth_agent.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,37 @@
11
import pytest
22
from unittest.mock import patch, MagicMock
3+
import numpy as np
4+
import tempfile
5+
import base64
6+
from io import BytesIO
7+
from PIL import Image as PILImage
38
from mbodied.types.sense.vision import Image
49
from mbodied.agents.sense.depth_estimation_agent import DepthEstimationAgent
5-
from mbodied.agents.sense.object_detection_agent import ObjectDetectionAgent
610
from mbodied.agents.backends.gradio_backend import GradioBackend
711

812

13+
def get_dummy_base64_image():
14+
"""Create a minimal valid base64 image for testing."""
15+
img = PILImage.new("RGB", (10, 10), color="red")
16+
buffered = BytesIO()
17+
img.save(buffered, format="JPEG")
18+
return base64.b64encode(buffered.getvalue()).decode()
19+
20+
921
@pytest.fixture
1022
def mock_gradio_backend():
23+
# Create a temp file that would be a valid location for a numpy file
24+
temp_file = tempfile.NamedTemporaryFile(suffix=".npy", delete=False)
25+
temp_path = temp_file.name
26+
temp_file.close()
27+
dummy_image_b64 = get_dummy_base64_image()
28+
1129
with patch.object(GradioBackend, "__init__", lambda x, model_src=None, **kwargs: None):
12-
with patch.object(GradioBackend, "predict", return_value=Image(size=(224, 224))):
13-
yield GradioBackend(endpoint="http://1.2.3.4:1234")
30+
# Return a valid base64 image string and a path
31+
with patch.object(GradioBackend, "predict", return_value=(dummy_image_b64, temp_path)):
32+
# Intercept np.load calls to avoid actual file system access
33+
with patch("numpy.load", return_value=np.zeros((224, 224))):
34+
yield GradioBackend(endpoint="http://1.2.3.4:1234")
1435

1536

1637
@pytest.fixture
@@ -29,14 +50,16 @@ def test_depth_agent_act(depth_agent):
2950
mock_image = MagicMock(spec=Image)
3051
mock_image.base64 = "base64encodedimage"
3152

32-
result = depth_agent.act(mock_image)
53+
result_img, depth_array = depth_agent.act(mock_image)
3354

34-
assert isinstance(result, Image)
55+
assert isinstance(result_img, Image)
56+
assert isinstance(depth_array, np.ndarray)
3557

3658

3759
@pytest.mark.network
3860
def test_real_depth_agent_act():
3961
# Make real network call.
4062
agent = DepthEstimationAgent(model_src="https://api.mbodi.ai/sense/")
41-
result = agent.act(image=Image("resources/xarm.jpeg", size=(224, 224)))
42-
assert isinstance(result, Image)
63+
result_img, depth_array = agent.act(image=Image("resources/xarm.jpeg", size=(224, 224)))
64+
assert isinstance(result_img, Image)
65+
assert isinstance(depth_array, np.ndarray)

tests/test_segmentation_agent.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,42 @@
11
import pytest
22
import numpy as np
3+
import tempfile
4+
import base64
5+
from io import BytesIO
6+
from PIL import Image as PILImage
37
from unittest.mock import patch, MagicMock
48
from mbodied.types.sense.world import BBox2D, PixelCoords
59
from mbodied.types.sense.vision import Image
610
from mbodied.agents.sense.segmentation_agent import SegmentationAgent
711

812

13+
def get_dummy_base64_image():
14+
"""Create a minimal valid base64 image for testing."""
15+
img = PILImage.new("RGB", (10, 10), color="red")
16+
buffered = BytesIO()
17+
img.save(buffered, format="JPEG")
18+
return base64.b64encode(buffered.getvalue()).decode()
19+
20+
921
@pytest.fixture
1022
def mock_gradio_backend():
23+
# Create a temp file that would be a valid location for a numpy file
24+
temp_file = tempfile.NamedTemporaryFile(suffix=".npy", delete=False)
25+
temp_path = temp_file.name
26+
temp_file.close()
27+
dummy_image_b64 = get_dummy_base64_image()
28+
1129
with patch(
1230
"mbodied.agents.backends.gradio_backend.GradioBackend.__init__", lambda x, model_src=None, **kwargs: None
1331
):
1432
with patch(
1533
"mbodied.agents.backends.gradio_backend.GradioBackend.predict",
16-
return_value=(Image((224, 224)), {"data": [[0]]}),
34+
return_value=(dummy_image_b64, temp_path),
1735
):
18-
from mbodied.agents.backends.gradio_backend import GradioBackend
36+
with patch("numpy.load", return_value=np.zeros((224, 224, 1))):
37+
from mbodied.agents.backends.gradio_backend import GradioBackend
1938

20-
yield GradioBackend(endpoint="http://1.2.3.4:1234")
39+
yield GradioBackend(endpoint="http://1.2.3.4:1234")
2140

2241

2342
@pytest.fixture

0 commit comments

Comments
 (0)