[maxscript] KDTree. Can we make it faster?


#6

append tree node function might be cleaner (it’s not faster):

	fn appendTreeNode root node =
	(
		active = true
		while active do
		(
			if node[3][root[4]] <= root[3][root[4]] then 
			(
				if root[1] != undefined then root = root[1] else
				(
					root[1] = node
					active = false
				)
			)
			else
			(
				if root[2] != undefined then root = root[2] else
				(
					root[2] = node
					active = false
				)
			)
		)
		node[4] = if root[4] == 2 then 1 else 2
	),

#7

Sorry to dredge up the thread again, but am I correct in that this would only split on the X and Y axes, and is therefore a K-D tree of only 2 dimensions?

I tried adding the third dimension by adding this method to change the split axis, rather than toggling back and forth on X and Y.

fn nextDimension dim = return 1 + (mod dim 3) as integer

I changed it in the appropriate places:

tree_node[4] = nextDimension potential_parent[4]

Also, in GetNearestNeighbor I send the axis directly:

fn GetNearestNeighbor nodeRoot target_pt axis = ...

and I increment it inside this function also, rather than toggle on state

GetNearestNeighbor nextBranch target_pt (nextDimension axis)

and, of course, use 1 for the initial call instead of a boolean:

fn FindNearestNeighbor pt = ( this.GetNearestNeighbor this.root pt 1 )	

The end of result of all this is… not much noticeable performance improvement. ^^; I did see some gain on initialization (I actually expected the effect to happen on search, not initialization) but not enough to be conclusive. To test in three dimensions instead of two, I replaced the text object with a series of nested GeoSpheres attached into a single Editable Mesh. Here is the result:

– original
KDTree build. Time: 0.081sec. Mem: 548240L Points count:7210
OK
Lookup: Time: 0.025sec. Mem: 480L
Lookup: Time: 0.024sec. Mem: 128L
Lookup: Time: 0.009sec. Mem: 128L
Lookup: Time: 0.024sec. Mem: 128L
Lookup: Time: 0.007sec. Mem: 128L
Lookup: Time: 0.024sec. Mem: 128L
Lookup: Time: 0.006sec. Mem: 128L
Lookup: Time: 0.024sec. Mem: 128L
Lookup: Time: 0.006sec. Mem: 128L
Lookup: Time: 0.024sec. Mem: 128L
Lookup: Time: 0.006sec. Mem: 128L
Lookup: Time: 0.023sec. Mem: 128L
Lookup: Time: 0.007sec. Mem: 128L
Lookup: Time: 0.023sec. Mem: 128L
Lookup: Time: 0.006sec. Mem: 128L
Lookup: Time: 0.024sec. Mem: 128L

– 3d
KDTree build. Time: 0.063sec. Mem: 548440L Points count:7210
OK
Lookup: Time: 0.021sec. Mem: 356L
Lookup: Time: 0.026sec. Mem: 128L
Lookup: Time: 0.023sec. Mem: 128L
Lookup: Time: 0.027sec. Mem: 128L
Lookup: Time: 0.013sec. Mem: 128L
Lookup: Time: 0.026sec. Mem: 128L
Lookup: Time: 0.021sec. Mem: 128L
Lookup: Time: 0.026sec. Mem: 128L
Lookup: Time: 0.022sec. Mem: 128L
Lookup: Time: 0.024sec. Mem: 128L
Lookup: Time: 0.02sec. Mem: 128L
Lookup: Time: 0.026sec. Mem: 128L
Lookup: Time: 0.021sec. Mem: 128L
Lookup: Time: 0.025sec. Mem: 128L
Lookup: Time: 0.021sec. Mem: 128L
Lookup: Time: 0.026sec. Mem: 128L
Lookup: Time: 0.02sec. Mem: 128L

I don’t know if this adds much; I’m actually not sure why the 2D version posted above works seemingly as correctly and efficiently… but just putting it out there. Here’s the whole code with my changes:

(
fn nextDimension dim = return 1 + (mod dim 3) as integer

struct KDTree
(
	root,
	
	fn AppendKDTreeNode root tree_node =
	(
		potential_parent = root
		active = true
		
		while active do
		(
			if tree_node[3][potential_parent[4]] <= potential_parent[3][potential_parent[4]] then
			(			
				if potential_parent[1] != undefined then
				(
					potential_parent = potential_parent[1]
				)
				else
				(
					potential_parent[1] = tree_node
					tree_node[4] = nextDimension potential_parent[4]
					active = false
				)
			)
			else
			(
				if potential_parent[2] != undefined then
				(
					potential_parent = potential_parent[2]
				)
				else
				(
					potential_parent[2] = tree_node
					tree_node[4] = nextDimension potential_parent[4]
					active = false
				)			
			)
		)	
	),

	fn CreateFromPoints points =
	(
		local divideByAxis = 1
		local tree = KDTree root:#( undefined, undefined, points[1], divideByAxis )		
		local tree_root = tree.root
		local append_tree_node = tree.AppendKDTreeNode
		
		for i = 2 to Points.Count do append_tree_node tree_root #( undefined, undefined, points[i], 1 )
		tree
	),
	
	fn Closest n0 n1 target_pt =
	(
		if n0 == undefined then n1 else if n1 == undefined then n0 else
		(
			if distance n0[3] target_pt < distance n1[3] target_pt then n0 else n1
		)		
	),

	fn GetNearestNeighbor nodeRoot target_pt axis = if nodeRoot != undefined do
	(	
		local nextBranch, otherBranch, best
		if target_pt[axis] < nodeRoot[3][axis] then
		(
			nextBranch  = nodeRoot[1]
			otherBranch = nodeRoot[2]
		) 
		else
		(
			nextBranch  = nodeRoot[2]
			otherBranch = nodeRoot[1]
		)
					
		temp = GetNearestNeighbor nextBranch target_pt (nextDimension axis)
		best = Closest temp nodeRoot target_pt
				
		dist = target_pt[axis] - nodeRoot[3][axis]

		if (distance target_pt best[3]) >= dist do
		(
			temp = GetNearestNeighbor otherBranch target_pt (nextDimension axis)
			best = Closest temp best target_pt
		)
		
		best		
	),		
	
	fn FindNearestNeighbor pt = ( this.GetNearestNeighbor this.root pt 1 )	
	
)


delete objects
gc()

txt = editable_mesh()
for i = 1 to 5 do (
	local geo = GeoSphere()
	geo.segs = 12
	geo.radius = i * 5
	addModifier geo (turn_to_mesh())
	maxOps.collapseNode geo true
	attach txt geo
)

maxOps.CollapseNode txt true
txt.xray = true
pts = for i=1 to txt.numverts collect getvert txt i


t1=timestamp();hf = heapfree

::kd = KDTree.CreateFromPoints pts	

format "KDTree build. Time: %sec. Mem: %  Points count:%\n" ((timestamp()-t1)/1000 as float) (hf-heapfree) pts.count


-- testing

delete helpers
::h1 = point centermarker:on cross:off wirecolor:red
::h2 = point pos:txt.center centermarker:off cross:true wirecolor:green isSelected:true

deleteAllChangeHandlers id:#xxx
when transform h2 changes id:#xxx val do
(
	t1=timestamp();hf = heapfree

	n = kd.FindNearestNeighbor ::h2.pos
	
	format "Lookup: Time: %sec. Mem: %\n" ((timestamp()-t1)/1000 as float) (hf-heapfree)
	::h1.pos = [ n[3].x, n[3].y, n[3].z ]
)

select h2
)

#8

Here’s an interesting finding after doing some profiling work… I wrote a basic linear search:

	struct PointSearcher
(
	points = #(),
	
	fn linearSearchIndex target = (
		if points.count < 1 then return undefined
		local closestIndex = 1
		for i = 2 to points.count do (
			if (distance target points[i] < distance target points[closestIndex]) then (
				closestIndex = i
			)
		)
		return closestIndex
	),
	
	fn linearSearch target = (
		if points.count < 1 then return undefined
		return (points[linearSearchIndex target])
	)
)

and for some reason, this turns out to be much faster. I tried it against both my adjustments and the last version posted by @Serejah, and adjusted the time stamp print statement to output the average for all runs - here’s what I got, for 12,180 verts:

k-D lookup: Time: 0.0366896sec. avg
Linear lookup: Time: 0.00656522sec. avg

for 115,210 verts:

k-D lookup: Time: 0.298429sec. avg
Linear lookup: Time: 0.05544sec. avg

for 460,810 verts:

Linear lookup: Time: 0.221226sec.
k-D: (max froze while processing callbacks for maybe a second of dragging)

So, I’d call this a pretty baffling result. Maxscript is pretty mercurial but I suppose what we’re running into is the language overhead mattering more than whatever patterns we come up with? Unfortunate.


#9

Yeah, seems like recursion performance penalty is way too big. But sometimes 2d uniform grid in enough for the task

Time: 0.207sec. Mem: 1936L – PointSearcher
pt: [0.13432,0.92957,0] – avg query

100k points Uniform 2D grid build Time: 1.269sec.
Time: 0.001sec. Mem: 1040L – avg query
pt: [0.13432,0.92957,0]


#10

Interesting - what script are those output logs from?


#11

Not sure if it was the latest version, but anyway. Should be enough for tests


#12

just kd-tree needs to be written right … :nerd_face:


#13

Let’s look at these numbers … We have a model size that is quite normal for current tasks (~ 100K vert). To find the 20 closest neighbors, you need to spend ~1 s. Where 20 verts is a normal amount for soft-selection, for example. Can we live with this? Probably not.


#14

Agreed. Did you spot a bug in the k-D tree implementation, @denisT? With the bottleneck, so far, being mostly due to the environment I am not sure where to start with finding or writing a better option.


#15

this one works better

(
	
struct AutodeskOctree
(
	Points,
	IArray,
	Octree,
	
	fn MakeListOfType type_name = 
	(
		local genericType = dotnet.GetType "System.Collections.Generic.List`1"
		local innerType   = dotnet.GetType type_name
		local specificType = genericType.MakeGenericType #(innerType)

		(dotnetclass "System.Activator").CreateInstance specificType
	),
	
	vec3_class,
	
	fn AsVec3 vec = dotNetObject vec3_class vec.x vec.y vec.z,
		
	fn OctreeFromPoints pts =
	(
		local lst = MakeListOfType "microsoft.xna.framework.vector3"
		local ListAdd = lst.add
		for p in pts do ListAdd (dotNetObject vec3_class p.x p.y p.z)

		local ia = dotnet.GetType (dotNetClass "autodesk.sequences.immutablearray")
		local method = (ia.GetMethods())[ 67 ] -- .Create that takes List<T> as a parameter
		local vec_type = dotNet.getType vec3_class
		local meth = method.makegenericmethod (dotNet.ValueToDotNetObject #( vec_type ) (dotNetClass "system.type[]"))
		
		-- 'IArray<Vector3>'
		IArray = meth.invoke (dotNetObject "system.object") (dotNet.ValueToDotNetObject #(lst) (dotNetClass "system.object[]"))

		Octree = dotNetObject "autodesk.geometry3d.vertexoctree" IArray

		if Octree != undefined do 
		(
			Points = pts
			Octree
		)
		
	),
	
	fn FindClosest pt =
	(
		index = Octree.FindClosestPoint (dotNetObject vec3_class pt.x pt.y pt.z)
		if index < 0 then undefined else Points[ index + 1 ]
	),
	
	on create do
	(
		dotNet.loadAssembly @"C:\Program Files\Autodesk\3ds Max 2016\Geometry3D.dll"
		dotNet.loadAssembly @"C:\Program Files\Autodesk\3ds Max 2016\ImmutableArray.dll"
		dotNet.loadAssembly @"C:\Program Files\Autodesk\3ds Max 2016\MonoGame.Framework.dll"

		vec3_class = dotnetclass "microsoft.xna.framework.vector3"
	)	
)



delete objects
gc()

txt = Text text:"MAXS\r\ncript"
addModifier txt (subdivide())
txt.modifiers[1].size = (distance txt.max txt.min) / 250.0

pts = for i=1 to txt.numverts collect getvert txt i

t1=timestamp();hf = heapfree

oc = AutodeskOctree()
format "1 Time: %sec. Mem: %\n" ((timestamp()-t1)/1000 as float) (hf-heapfree)

oc.OctreeFromPoints pts

format "2 Time: %sec. Mem: %\n" ((timestamp()-t1)/1000 as float) (hf-heapfree)

delete helpers
::h1 = point centermarker:on cross:off wirecolor:red
::h2 = point pos:txt.center centermarker:off cross:true wirecolor:green isSelected:true

deleteAllChangeHandlers id:#xxx
when transform h2 changes id:#xxx val do
(
	t1=timestamp();hf = heapfree

	n = oc.FindClosest ::h2.pos
	
	format "Lookup: Time: %sec. Mem: %\n" ((timestamp()-t1)/1000 as float) (hf-heapfree)
	::h1.pos = n
)

)

#16
--dotNetObject "microsoft.xna.framework.vector3" 1 2 3

--  make it in advance and put to the struct:
vec3_class = dotnetclass "microsoft.xna.framework.vector3" 

dotnetobject vec3_class x y z

#17

Nice, thanks!
8x faster with no effort.

updated the code in previous post

Wonder if it is possible to skip maxscript loop over points at all and somehow cast mesh verts array (from c# IMesh) to Xna.Vector3 array. It should be way faster.


#18
global AutodeskOctree
(
	struct AutodeskOctreeStruct
	(
		points,
		octree,
		
		fn createGenericList class = 
		(
			local genericType = dotnet.GetType "System.Collections.Generic.List`1"
			if iskindof (local elementType = dotnet.GetType class) dotnetobject do
			(
				(dotnetclass "System.Activator").CreateInstance (genericType.MakeGenericType #(elementType))
			)
		),

		vec3_class = dotnetclass "microsoft.xna.framework.vector3",	
		fn as_vec3 vec = dotnetobject vec3_class vec.x vec.y vec.z,
			
		fn create_from_points pp =
		(
	t0 = timestamp()
	h0 = heapfree
			
			list = createGenericList "microsoft.xna.framework.vector3"
			for p in pp do list.add (as_vec3 p)

	t1 = timestamp()
	h1 = heapfree
					
			-- vec3_tab = dotnet.valuetodotnetobject vec3_tab (dotnetclass "system.object[]")

			ia_type = dotnet.gettype (dotnetclass "autodesk.sequences.immutablearray")
			ia_methods = ia_type.getmethods() -- [ 67 ] >> .create that takes list<t> as a parameter
			v_type = dotnet.gettype (dotnetclass "microsoft.xna.framework.vector3")	
			create_method = ia_methods[67].makegenericmethod (dotnet.valuetodotnetobject #(v_type) (dotnetclass "system.type[]"))
			
			-- 'IArray<Vector3>'
			vec3_arr = create_method.invoke (dotnetobject "system.object") (dotnet.valuetodotnetobject #(list) (dotnetclass "system.object[]"))
				
			oc_tree_class = dotnetclass "autodesk.geometry3d.vertexoctree"
			oc_tree = dotnetobject "autodesk.geometry3d.vertexoctree" vec3_arr

	t2 = timestamp()
	h2 = heapfree
			
	format "\t >> time:%(%, %) heap:%(% %)\n" (t2 - t0) (t1 - t0) (t2 - t1) (h0 - h2) (h0 - h1) (h1 - h2)
			
			oc_tree		
		),
			
		fn find_nearest_index p =
		(
			octree.FindClosestPoint (as_vec3 p) + 1
		),
		fn find_nearest_point pt =
		(
			if (i = find_nearest_index p) > 0 do points[i]
		),
		
		on create do
		(
			dotnet.loadassembly @"geometry3d.dll"
			dotnet.loadassembly @"immutablearray.dll"
			dotnet.loadassembly @"monogame.framework.dll"			
		)	
	)

	AutodeskOctree = AutodeskOctreeStruct
	ok
)


delete objects
gc()

pp = 
(
	s = geosphere segs:100 
	addmodifier s (Edit_Poly())
	for k=1 to numpoints s collect (getpointpos s k)
)
	
_oc = AutodeskOctree()
	
(
	t0 = timestamp()
	h0 = heapfree

	if (oc_tree = _oc.create_from_points pp) != undefined do 
	(
		_oc.octree = oc_tree
		_oc.points = deepcopy pp
	)
	format "% time:% heap:%\n" pp.count (timestamp() - t0) (h0 - heapfree)
)	

(
	num = 1000
	v = -1
	index = random 1 oc.points.count
	
	t0 = timestamp()
	h0 = heapfree

	for k=1 to num do
	(
		v = _oc.find_nearest_index pp[index]
	)

	format "(%) closest:(% >> %) time:% heap:%\n" num index v (timestamp() - t0) (h0 - heapfree)
)

Thanks for the nice find with VertexOctree. This is the right direction (MXS + C#)

(I’ve cleaned up your code to “my style” to make it easier for me to debug)


#19

As we can see the only problem is the casting mxs point3 to Vector3 and add the value to List.

And it’s the only problem. All other looks good enough to play with.


#20

For some unknown reason max doesn’t allow to make a IntPtr to the first mesh vert from maxscript.
When I do it from compiled c# dll it works nicely, but I’d love not to compile a thing if it is possible.
ImmutableArray can be created from arrays as well, so probably we can copy whole IPoint3 memory block and use it to make Vec3 array. But how do we add the first point?

g   = (dotNetClass "Autodesk.Max.GlobalInterface").Instance
tri = (createInstance box).mesh
v   = g.executescript (g.stringstream.create "::tri") true
vts = v.ToMesh.Verts

base   = (vts.item 0).INativeObject__Handle  -- max 2014
offset = 12 -- 3 * 4 bytes to skip first Point3 entirely
floats = dotNetObject "system.single[]" (vts.Count * 3)
m = (dotNetClass "System.Runtime.InteropServices.Marshal")
m.copy (dotNetObject "System.IntPtr" (base + offset)) floats 0 (vts.Count * 3) 

for i = 0 to 20 by 3 do format "%: [%,%,%] - %\n" (1 + ((i+3)/3)) (floats.get i) (floats.get (i+1)) (floats.get (i+2)) (getvert tri (1 + (i+3)/3))

#21

this is what i would do …
I would make an assembly to convert IMesh or system.single[] to IArray . Everything else will be slow anyway I guess


#22

Kinda works, but I hate the fact that I have to compile here and there for every little thing

Time: 0.02sec. Mem: 3688L – for converting 9167 point3 to vec3 array


global MemUtils =
(
	source = "using System;\n"
	source += "using System.Runtime.InteropServices;\n"
	source += "class MemUtils\n"
	source += "{\n"
	source += "
	public static T[] MarshalUnmananagedArray2Struct<T>(Int64 unmanagedArray, int length )
	{
		var size = Marshal.SizeOf(typeof(T));
		var mangagedArray = new T[length];

		for (int i = 0; i < length; i++)
		{
			IntPtr ins = new IntPtr(unmanagedArray + i * size);
			mangagedArray[i] = Marshal.PtrToStructure<T>(ins);
		}
	
		return mangagedArray;
	 }
	"
	source += "}\n"

	csharpProvider = dotnetobject "Microsoft.CSharp.CSharpCodeProvider"
	compilerParams = dotnetobject "System.CodeDom.Compiler.CompilerParameters"
	compilerParams.ReferencedAssemblies.Add("System.dll");
	compilerParams.GenerateInMemory = on
	compilerResults = csharpProvider.CompileAssemblyFromSource compilerParams #(source)
	compilerResults.CompiledAssembly.CreateInstance "MemUtils"
)

fn UnmananagedArray2Struct first_item_handle count struct_class =
(
	local mi   = (::MemUtils.gettype()).getmethod "MarshalUnmananagedArray2Struct"
	local meth = mi.makegenericmethod (dotNet.ValueToDotNetObject #(  dotNet.getType struct_class ) (dotNetClass "system.type[]"))
	meth.invoke (dotNetObject "system.object") (dotNet.ValueToDotNetObject #( first_item_handle, count ) (dotNetClass "system.object[]")) asdotnetobject:true
)


tri = (createInstance Teapot).mesh
v   = g.executescript (g.stringstream.create "::tri") true
vts = v.ToMesh.Verts
base   = (vts.item 0).INativeObject__Handle

teapot_vec3 = UnmananagedArray2Struct base tri.numverts (dotNetClass "microsoft.xna.framework.vector3")

#23

This is not for a little thing. It can be a part of MXS_Octree (or some Quick_Verts) structure with another helpful methods


#24

You make make IArray at once…

It would also be better to pass an MXS point3 array instead of a mesh. In case you want to have a special list of points, not just all vertices.


#25

Yes, it definitely should be created inside same function

btw. Geometry3D library also contains a wrapper for Embree, but it uses old embree version so it doesn’t support node transform. All ray intersections happen in local space which is sad