Skip to content

Commit 73c4f0d

Browse files
committed
Add some haddocks and improve code readability
1 parent 1af2bef commit 73c4f0d

File tree

1 file changed

+53
-35
lines changed

1 file changed

+53
-35
lines changed

src/System/Hatrace.hs

Lines changed: 53 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,10 @@ module System.Hatrace
6767

6868
import Conduit (foldlC)
6969
import Control.Arrow (second)
70-
import Control.Monad (when)
7170
import Control.Monad.IO.Class (MonadIO, liftIO)
7271
import Control.Monad.IO.Unlift (MonadUnliftIO)
73-
import Data.Bits ((.|.), (.&.), shiftL, shiftR)
72+
import Data.Bits ((.|.), shiftL, shiftR)
7473
import Data.ByteString (ByteString)
75-
import qualified Data.ByteString as BS
7674
import qualified Data.ByteString.Internal as BSI
7775
import Data.Conduit
7876
import qualified Data.Conduit.List as CL
@@ -998,15 +996,16 @@ getExePath pid = do
998996

999997

1000998
data FileWriteEvent
1001-
= FileOpen ByteString
999+
= FileOpen ByteString -- ^ name used to open the file
10021000
| FileWrite
10031001
| FileClose
10041002
| FileRename ByteString -- ^ new (target) name
10051003
deriving (Eq, Ord, Show)
10061004

1005+
-- | Uses raw trace events to produce more focused events aimed at analysing file writes.
1006+
-- Output events are accompanied by corresponding absolute file paths.
1007+
--
10071008
-- NOTES:
1008-
-- * the code doesn't register `open` syscalls for files opened as readonly,
1009-
-- at the same time this filter isn't applied for other syscalls (close, rename)
10101009
-- * only calls to `write` are currently used as a marker for writes and syscalls
10111010
-- `pwrite`, `writev`, `pwritev` are not taken into account
10121011
fileWritesConduit :: (MonadIO m) => ConduitT (CPid, TraceEvent) (FilePath, FileWriteEvent) m ()
@@ -1066,48 +1065,67 @@ data FileWriteBehavior
10661065
| Unexpected String
10671066
deriving (Eq, Ord, Show)
10681067

1068+
-- uses state machine implemented as recursive functions
10691069
analyzeWrites :: [FileWriteEvent] -> FileWriteBehavior
1070-
analyzeWrites events = checkOpen events
1070+
analyzeWrites es = checkOpen es
10711071
where
1072-
checkOpen [] = NoWrites
1073-
-- we could see a close syscall for a file opened in readonly mode
1074-
-- thus we just ignore it
1075-
checkOpen (FileClose:es) = checkOpen es
1076-
checkOpen (FileOpen _:es) = checkWrites es
1077-
checkOpen (e:_) = unexpected "FileOpen" e
1078-
checkWrites [] = Unexpected $ "FileClose was expected but not seen"
1079-
checkWrites (FileClose:es) = checkOpen es
1080-
checkWrites (FileWrite:es) = checkWrites' es
1081-
checkWrites (e: _) = unexpected "FileClose or FileWrite" e
1082-
checkWrites' [] = Unexpected $ "FileClose was expected but not seen"
1083-
checkWrites' (FileWrite:es) = checkWrites' es
1084-
checkWrites' (FileClose:es) = checkRename es
1085-
checkWrites' (e: _) = unexpected "FileClose or FileWrite" e
1086-
checkRename (FileRename path:es) =
1087-
case checkOpen es of
1088-
NoWrites ->
1089-
-- we write original path here which swapped
1090-
-- with oldpath in `atomicWritesSink`
1091-
AtomicWrite (T.unpack $ decodeUtf8OrError path)
1092-
other ->
1093-
other
1094-
checkRename es =
1095-
case checkOpen es of
1096-
NoWrites -> NonatomicWrite
1097-
other -> other
1098-
unexpected expected real =
1072+
checkOpen events =
1073+
case events of
1074+
[] -> NoWrites
1075+
-- we could see a `close` syscall for a pipe descriptor
1076+
-- with no `open` for it thus we just ignore it
1077+
FileClose : rest -> checkOpen rest
1078+
FileOpen _ : rest -> checkWrites rest
1079+
unexpected : _ -> unexpectedEvent "FileOpen" unexpected
1080+
checkWrites events =
1081+
case events of
1082+
[] -> Unexpected $ "FileClose was expected but not seen"
1083+
FileClose : rest -> checkOpen rest
1084+
FileWrite : rest -> checkAfterWrite rest
1085+
unexpected : _ -> unexpectedEvent "FileClose or FileWrite" unexpected
1086+
checkAfterWrite events =
1087+
case events of
1088+
[] -> Unexpected $ "FileClose was expected but not seen"
1089+
FileWrite : rest -> checkAfterWrite rest
1090+
FileClose : rest -> checkRename rest
1091+
unexpected : _ -> unexpectedEvent "FileClose or FileWrite" unexpected
1092+
-- when it happens that a path gets more than 1 sequence open-write-close
1093+
-- for it we need to check whether there was a `rename` after the 1st one
1094+
-- and then check the result of the next one and combine them accordingly
1095+
-- e.g. atomic + non-atomic -> non-atomic
1096+
checkRename events =
1097+
case events of
1098+
FileRename path : rest ->
1099+
case checkOpen rest of
1100+
NoWrites ->
1101+
-- we write original path here which swapped
1102+
-- with oldpath in `atomicWritesSink`
1103+
AtomicWrite (T.unpack $ decodeUtf8OrError path)
1104+
other ->
1105+
other
1106+
noRenames ->
1107+
case checkOpen noRenames of
1108+
NoWrites -> NonatomicWrite
1109+
other -> other
1110+
unexpectedEvent expected real =
10991111
Unexpected $ "expected " ++ expected ++ ", but " ++
11001112
show real ++ " was seen"
11011113

11021114
atomicWritesSink :: (MonadIO m) => ConduitT (CPid, TraceEvent) Void m (Map FilePath FileWriteBehavior)
11031115
atomicWritesSink =
1104-
extract <$> (fileWritesConduit .| foldlC collectWrite mempty)
1116+
extract <$> (fileWritesConduit .| foldlC collectWrite Map.empty)
11051117
where
1118+
collectWrite :: Map FilePath [FileWriteEvent] -> (FilePath, FileWriteEvent) -> Map FilePath [FileWriteEvent]
11061119
collectWrite m (fp, e) = Map.alter (Just . maybe [e] (e:)) fp m
1120+
extract :: Map FilePath [FileWriteEvent] -> Map FilePath FileWriteBehavior
11071121
extract m =
11081122
let (noRenames, renames) =
11091123
partitionEithers . map (analyzeWrites' . second reverse) $ Map.toList m
11101124
in Map.fromList noRenames <> Map.fromList (map (second AtomicWrite) renames)
1125+
-- this function (in addition to what `analyzeWrites` does) treats atomic writes
1126+
-- in a special way: those include a rename and we need to put atomic writes under
1127+
-- a path which is a target of a corresponding rename
1128+
-- so in the end we swap path in `AtomicWrite` and its corresponding map key
11111129
analyzeWrites' (src, es) = case analyzeWrites es of
11121130
AtomicWrite target -> Right (target, src)
11131131
other -> Left (src, other)

0 commit comments

Comments
 (0)