权利 我无法抵制这一限制,并写下了一种简单金属包装实施:
{-# language TypeSynonymInstances #-}
{-# language BangPatterns #-}
import Data.Bits
import Data.Word
data NukeTide = A | T | C | G deriving (Read, Show, Eq, Ord, Enum)
type UnpackedMotif = [NukeTide]
type PackageType = Word32
nukesInPackage = 16 :: Int
allSetMask = complement 0 :: PackageType
-- Be careful to have length of motif == nukesInPackage here!
packNukesToWord :: UnpackedMotif -> PackageType
packNukesToWord = packAt 0
where packAt _ [] = 0
packAt i (m:ml) = (b0 m .&. bit i)
.|. (b1 m .&. bit (i+1))
.|. packAt (i+2) ml
b0 A = 0
b0 T = allSetMask
b0 C = 0
b0 G = allSetMask
b1 A = 0
b1 T = 0
b1 C = allSetMask
b1 G = allSetMask
unpackNukesWord :: PackageType -> UnpackedMotif
unpackNukesWord = unpackNNukesFromWord nukesInPackage
unpackNNukesFromWord :: Int -> PackageType -> UnpackedMotif
unpackNNukesFromWord = unpackN
where unpackN 0 _ = []
unpackN i w = (nukeOf $ w .&. r2Mask):(unpackN (i-1) $ w`shiftR`2)
nukeOf bs
| bs == 0 = A
| bs == bit 0 = T
| bs == bit 1 = C
| otherwise = G
r2Mask = (bit 1 .|. bit 0) :: PackageType
data PackedMotif = PackedMotif { motifPackets::[PackageType]
, nukesInLastPack::Int }
-- note nukesInLastPack will never be zero; motifPackets must be [] to represent empty motifs.
packNukes :: UnpackedMotif -> PackedMotif
packNukes m = case remain of
[] -> PackedMotif [packNukesToWord takeN] (length takeN)
r -> prAppend (packNukesToWord takeN) (packNukes r)
where (takeN, remain) = splitAt nukesInPackage m
prAppend w (PackedMotif l i) = PackedMotif (w:l) i
unpackNukes :: PackedMotif -> UnpackedMotif
unpackNukes (PackedMotif l i) = unpack l i
where unpack [l] i = unpackNNukesFromWord i l
unpack (l:ls) i = unpackNukesWord l ++ unpack ls i
unpack [] _ = []
instance Show PackedMotif where
show = show . unpackNukes
class Nukes a where
pLength :: a -> Int
shiftLN1 :: a -> a
hammingDistance :: a -> a -> Int
motifs :: Int -> a -> [a]
instance Nukes PackageType where
pLength _ = nukesInPackage
shiftLN1 = (`shiftR`2)
hammingDistance !x !y = fromIntegral $ abt (x `xor` y)
where abt !b = bbt(b.&.a0Mask .|. ((b.&.a1Mask) `shiftR` 1))
bbt !b = sbt $ (b.&.r16Mask) + (b `shiftR` nukesInPackage)
sbt !b = (r2Mask .&. b) + (r2Mask .&. (b`shiftR`2))
+ (r2Mask .&. (b`shiftR`4)) + (r2Mask .&. (b`shiftR`6))
+ (r2Mask .&. (b`shiftR`8)) + (r2Mask .&. (b`shiftR`10))
+ (r2Mask .&. (b`shiftR`12)) + (r2Mask .&. (b`shiftR`14))
a0Mask = 0x55555555 :: PackageType
a1Mask = 0xAAAAAAAA :: PackageType
r16Mask = 0xFFFF :: PackageType
r2Mask = 0x3 :: PackageType
motifs 0 _ = []
motifs l x = x : motifs (l-1) (shiftLN1 x)
maskNukesBut :: Int -> PackageType -> PackageType
maskNukesBut i = ( ( allSetMask `shiftR` (2*(nukesInPackage - i)) ) .&.)
instance Nukes PackedMotif where
pLength (PackedMotif (x:xs) ix) = nukesInPackage * (length xs) + ix
pLength _ = 0
shiftLN1 ξ@(PackedMotif [] _) = ξ
shiftLN1 (PackedMotif [x] ix) | ix>1 = PackedMotif [x`shiftR`2] (ix-1)
| otherwise = PackedMotif [] nukesInPackage
shiftLN1 (PackedMotif (x:x :xs) ix)
= PackedMotif (( shiftLN1 x .|. pnext ):sxs) resLMod
where sxs = motifPackets $ shiftLN1 (PackedMotif (x :xs) ix)
pnext = shiftL (x .&.0x3) 30
resLMod = if ix > 1 then (ix-1) else nukesInPackage
hammingDistance xs ys = go 0 xs ys
where
go :: Int -> PackedMotif -> PackedMotif -> Int
go !acc (PackedMotif [x] ix) (PackedMotif [y] iy)
| ix > iy = acc + (hammingDistance y $ maskNukesBut iy x)
| otherwise = acc + (hammingDistance x $ maskNukesBut ix y)
go !acc (PackedMotif [x] ix) (PackedMotif (y:ys) iy)
= acc + (hammingDistance x $ maskNukesBut ix y)
go !acc (PackedMotif (x:xs) ix) (PackedMotif [y] iy)
= acc + (hammingDistance y $ maskNukesBut iy x)
go !acc (PackedMotif (x:xs) ix) (PackedMotif (y:ys) iy)
= go (acc + hammingDistance x y) (PackedMotif xs ix) (PackedMotif ys iy)
go !acc _ _ = acc
motifs l ξ
| l>0 = fShfts (min nukesInPackage $ pLength ξ + 1 - l) ξ >>= ct
| otherwise = []
where fShfts k χ | k > 0 = χ : fShfts (k-1) (shiftLN1 χ)
| otherwise = []
ct (PackedMotif ys iy) = case remain of
[] -> if (length takeN - 1) * nukesInPackage + iy >= l
then [PackedMotif takeN lMod] else []
_ -> PackedMotif takeN lMod : ct(PackedMotif (tail ys) iy)
where (takeN, remain) = splitAt lQuot ys
(lQuot,lMod) = case l `quotRem` nukesInPackage of
(i,0) -> (i, nukesInPackage)
(i,m) -> (i+1, m)
可使用<代码>。 Un PackedMotif = [NukeTide] s with the PackNukes function, e.g.
*BioNuke0> motifs 23 $ packNukes $ take 27 $ cycle [A,T,G,C,A]
[[A,T,G,C,A,A,T,G,C,A,A,T,G,C,A,A,T,G,C,A,A,T,G],[T,G,C,A,A,T,G,C,A,A,T,G,C,A,A,T,G,C,A,A,T,G,C],[G,C,A,A,T,G,C,A,A,T,G,C,A,A,T,G,C,A,A,T,G,C,A],[C,A,A,T,G,C,A,A,T,G,C,A,A,T,G,C,A,A,T,G,C,A,A],[A,A,T,G,C,A,A,T,G,C,A,A,T,G,C,A,A,T,G,C,A,A,T]]
*BioNuke0> hammingDistance (packNukes [A,T,G,C,A,A,T,G]) (packNukes [A,T,C,C,A,T,G])
3
*BioNuke0> map (hammingDistance (packNukes $ take 52 $ cycle [A,T,C,C,A,T,G])) (motifs 52 $ packNukes $ take 523 $ cycle [A,T,C,C,A,T,G])
[0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44,38,52,0,52,36,45,44]
I haven t compared the performance to the original version yet, but it should be quite a bit faster than any algebraic-datatype implementation. Plus, it readily offers a space-efficient storage format.