{-# LANGUAGE Haskell2010 #-} {-# LANGUAGE BangPatterns #-} import Control.Arrow import Data.Bits import Data.Vector.UnBoxed ((!)) import Data.Word import System.Environment (getArgs) import qualified Codec.Picture as P import qualified Data.ByteString as B import qualified Data.Vector.UnBoxed as V
我试着移植Ken Perlin’s improved noise
到Haskell,但我不完全确定我的方法是正确的.主要部分
是一个应该很好地推广到更高和更低维度的东西,但是
这是以后的事情:
perlin3 :: (Ord a,Num a,RealFrac a,V.UnBox a) => Permutation -> (a,a,a) -> a perlin3 p (!x',!y',!z') = let (!xX,!x) = actuallyProperFraction x' (!yY,!y) = actuallyProperFraction y' (!zZ,!z) = actuallyProperFraction z' !u = fade x !v = fade y !w = fade z !h = xX !a = next p h + yY !b = next p (h+1) + yY !aa = next p a + zZ !ab = next p (a+1) + zZ !ba = next p b + zZ !bb = next p (b+1) + zZ !aaa = next p aa !aab = next p (aa+1) !aba = next p ab !abb = next p (ab+1) !baa = next p ba !bab = next p (ba+1) !bba = next p bb !bbb = next p (bb+1) in lerp w (lerp v (lerp u (grad aaa (x,y,z)) (grad baa (x-1,z))) (lerp u (grad aba (x,y-1,z)) (grad bba (x-1,z)))) (lerp v (lerp u (grad aab (x,z-1)) (grad bab (x-1,z-1))) (lerp u (grad abb (x,z-1)) (grad bbb (x-1,z-1))))
这当然伴随着perlin3中提到的一些功能
功能,我希望它们尽可能高效:
fade :: (Ord a,Num a) => a -> a fade !t | 0 <= t,t <= 1 = t * t * t * (t * (t * 6 - 15) + 10) lerp :: (Ord a,Num a) => a -> a -> a -> a lerp !t !a !b | 0 <= t,t <= 1 = a + t * (b - a) grad :: (Bits hash,Integral hash,V.UnBox a) => hash -> (a,a) -> a grad !hash (!x,!y,!z) = dot3 (vks `V.unsafeIndex` fromIntegral (hash .&. 15)) (x,z) where vks = V.fromList [ (1,1,0),(-1,(1,-1,1),-1),(0,-1) ] dot3 :: Num a => (a,a) -> (a,a) -> a dot3 (!x0,!y0,!z0) (!x1,!y1,!z1) = x0 * x1 + y0 * y1 + z0 * z1 -- Unlike `properFraction`,`actuallyProperFraction` rounds as intended. actuallyProperFraction :: (RealFrac a,Integral b) => a -> (b,a) actuallyProperFraction x = let (ipart,fpart) = properFraction x r = if x >= 0 then (ipart,fpart) else (ipart-1,1+fpart) in r
对于排列组,我只是复制了他网站上使用的Perlin:
newtype Permutation = Permutation (V.Vector Word8) mkPermutation :: [Word8] -> Permutation mkPermutation xs | length xs >= 256 = Permutation . V.fromList $xs permutation :: Permutation permutation = mkPermutation [151,160,137,91,90,15,131,13,201,95,96,53,194,233,7,225,140,36,103,30,69,142,8,99,37,240,21,10,23,190,6,148,247,120,234,75,26,197,62,94,252,219,203,117,35,11,32,57,177,33,88,237,149,56,87,174,20,125,136,171,168,68,175,74,165,71,134,139,48,27,166,77,146,158,231,83,111,229,122,60,211,133,230,220,105,92,41,55,46,245,40,244,102,143,54,65,25,63,161,216,80,73,209,76,132,187,208,89,18,169,200,196,135,130,116,188,159,86,164,100,109,198,173,186,3,64,52,217,226,250,124,123,5,202,38,147,118,126,255,82,85,212,207,206,59,227,47,16,58,17,182,189,28,42,223,183,170,213,119,248,152,2,44,154,163,70,221,153,101,155,167,43,172,9,129,22,39,253,19,98,108,110,79,113,224,232,178,185,112,104,218,246,97,228,251,34,242,193,238,210,144,12,191,179,162,241,81,51,145,235,249,14,239,107,49,192,214,31,181,199,106,157,184,84,204,176,115,121,50,45,127,4,150,254,138,236,205,93,222,114,67,29,24,72,243,141,128,195,78,66,215,61,156,180 ] next :: Permutation -> Word8 -> Word8 next (Permutation !v) !idx' = v `V.unsafeIndex` (fromIntegral $idx' .&. 0xFF)
所有这些都与JuicyPixels捆绑在一起:
main = do [target] <- getArgs let image = P.generateImage pixelRenderer 512 512 P.writePng target image where pixelRenderer,pixelRenderer' :: Int -> Int -> Word8 pixelRenderer !x !y = floor $((perlin3 permutation ((fromIntegral x - 256) / 32,(fromIntegral y - 256) / 32,0 :: Double))+1)/2 * 128 -- This code is much more readable,but also much slower. pixelRenderer' x y = (\w -> floor $((w+1)/2 * 128)) -- w should be in [-1,+1] . perlin3 permutation . (\(x,z) -> ((x-256)/32,(y-256)/32,(z-256)/32)) $(fromIntegral x,fromIntegral y,0 :: Double)
我的问题是perlin3对我来说似乎很慢.如果我对它进行配置,则为pixelRenderer
也有很多时间,但我现在会忽略它.我不知道
如何优化perlin3.我试图暗示GHC的爆炸模式,削减
执行时间减半,这样很好.明确专业化和内联
ghc -O几乎没有帮助. perlin3应该是这么慢吗?
更新:此问题的早期版本提到了我的代码中的错误.这个问题已经解决了;事实证明我的旧版本的ActualProperFraction是错误的.它隐含地将浮点数的整数部分舍入到Word8,然后从浮点数中减去它以得到小数部分.由于Word8只能取0到255之间的值,因此对于该范围之外的数字(包括负数),这将无法正常工作.
解决方法
有两种用于衡量性能的有用工具:分析和代码转储.我在perlin3中添加了一个SCC注释,以便它显示在配置文件中.然后我用gcc -O2 -fforce-recomp -ddump-simpl -prof -auto编译. -ddump-simpl标志打印简化代码.
分析:在我的计算机上,运行程序需要0.60秒,根据配置文件,在perlin3中花费大约20%的执行时间(0.12秒).请注意,我的个人资料信息的精确度约为/ -3%.
简化器输出:简化器产生相当干净的代码. perlin3内联到pixelRenderer中,因此这是您想要查看的输出的一部分.大多数代码包括未装箱的数组读取和未装箱的算术.为了提高性能,我们想要消除一些这样的算法.
一个简单的改变是消除对SomeFraction的运行时检查(这不会出现在您的问题中,但是您上传的代码的一部分).这会将程序的执行时间减少到0.56秒.
-- someFraction t | 0 <= t,t < 1 = SomeFraction t someFraction t = SomeFraction t
接下来,有几个数组查找显示在简化器中,如下所示:
case GHC.Prim.indexWord8Array# ipv3_s23a (GHC.Prim.+# ipv1_s21N (GHC.Prim.word2Int# (GHC.Prim.and# (GHC.Prim.narrow8Word# (GHC.Prim.plusWord# ipv5_s256 (__word 1))) (__word 255))))
原始操作narrow8Word#用于从Int到Word8的强制转换.我们可以通过在下一个定义中使用Int而不是Word8来消除这种强制.
next :: Permutation -> Int -> Int next (Permutation !v) !idx' = fromIntegral $v `V.unsafeIndex` (fromIntegral idx' .&. 0xFF)
这会将程序的执行时间减少到0.54秒.考虑到在perlin3上花费的时间,执行时间(大致)从0.12秒下降到0.06秒.虽然很难衡量其余时间的去向,但很可能会在剩余的算术和数组访问中进行分析.