@@ -67,12 +67,10 @@ module System.Hatrace
67
67
68
68
import Conduit (foldlC )
69
69
import Control.Arrow (second )
70
- import Control.Monad (when )
71
70
import Control.Monad.IO.Class (MonadIO , liftIO )
72
71
import Control.Monad.IO.Unlift (MonadUnliftIO )
73
- import Data.Bits ((.|.) , (.&.) , shiftL , shiftR )
72
+ import Data.Bits ((.|.) , shiftL , shiftR )
74
73
import Data.ByteString (ByteString )
75
- import qualified Data.ByteString as BS
76
74
import qualified Data.ByteString.Internal as BSI
77
75
import Data.Conduit
78
76
import qualified Data.Conduit.List as CL
@@ -998,15 +996,16 @@ getExePath pid = do
998
996
999
997
1000
998
data FileWriteEvent
1001
- = FileOpen ByteString
999
+ = FileOpen ByteString -- ^ name used to open the file
1002
1000
| FileWrite
1003
1001
| FileClose
1004
1002
| FileRename ByteString -- ^ new (target) name
1005
1003
deriving (Eq , Ord , Show )
1006
1004
1005
+ -- | Uses raw trace events to produce more focused events aimed at analysing file writes.
1006
+ -- Output events are accompanied by corresponding absolute file paths.
1007
+ --
1007
1008
-- NOTES:
1008
- -- * the code doesn't register `open` syscalls for files opened as readonly,
1009
- -- at the same time this filter isn't applied for other syscalls (close, rename)
1010
1009
-- * only calls to `write` are currently used as a marker for writes and syscalls
1011
1010
-- `pwrite`, `writev`, `pwritev` are not taken into account
1012
1011
fileWritesConduit :: (MonadIO m ) => ConduitT (CPid , TraceEvent ) (FilePath , FileWriteEvent ) m ()
@@ -1066,48 +1065,67 @@ data FileWriteBehavior
1066
1065
| Unexpected String
1067
1066
deriving (Eq , Ord , Show )
1068
1067
1068
+ -- uses state machine implemented as recursive functions
1069
1069
analyzeWrites :: [FileWriteEvent ] -> FileWriteBehavior
1070
- analyzeWrites events = checkOpen events
1070
+ analyzeWrites es = checkOpen es
1071
1071
where
1072
- checkOpen [] = NoWrites
1073
- -- we could see a close syscall for a file opened in readonly mode
1074
- -- thus we just ignore it
1075
- checkOpen (FileClose : es) = checkOpen es
1076
- checkOpen (FileOpen _: es) = checkWrites es
1077
- checkOpen (e: _) = unexpected " FileOpen" e
1078
- checkWrites [] = Unexpected $ " FileClose was expected but not seen"
1079
- checkWrites (FileClose : es) = checkOpen es
1080
- checkWrites (FileWrite : es) = checkWrites' es
1081
- checkWrites (e: _) = unexpected " FileClose or FileWrite" e
1082
- checkWrites' [] = Unexpected $ " FileClose was expected but not seen"
1083
- checkWrites' (FileWrite : es) = checkWrites' es
1084
- checkWrites' (FileClose : es) = checkRename es
1085
- checkWrites' (e: _) = unexpected " FileClose or FileWrite" e
1086
- checkRename (FileRename path: es) =
1087
- case checkOpen es of
1088
- NoWrites ->
1089
- -- we write original path here which swapped
1090
- -- with oldpath in `atomicWritesSink`
1091
- AtomicWrite (T. unpack $ decodeUtf8OrError path)
1092
- other ->
1093
- other
1094
- checkRename es =
1095
- case checkOpen es of
1096
- NoWrites -> NonatomicWrite
1097
- other -> other
1098
- unexpected expected real =
1072
+ checkOpen events =
1073
+ case events of
1074
+ [] -> NoWrites
1075
+ -- we could see a `close` syscall for a pipe descriptor
1076
+ -- with no `open` for it thus we just ignore it
1077
+ FileClose : rest -> checkOpen rest
1078
+ FileOpen _ : rest -> checkWrites rest
1079
+ unexpected : _ -> unexpectedEvent " FileOpen" unexpected
1080
+ checkWrites events =
1081
+ case events of
1082
+ [] -> Unexpected $ " FileClose was expected but not seen"
1083
+ FileClose : rest -> checkOpen rest
1084
+ FileWrite : rest -> checkAfterWrite rest
1085
+ unexpected : _ -> unexpectedEvent " FileClose or FileWrite" unexpected
1086
+ checkAfterWrite events =
1087
+ case events of
1088
+ [] -> Unexpected $ " FileClose was expected but not seen"
1089
+ FileWrite : rest -> checkAfterWrite rest
1090
+ FileClose : rest -> checkRename rest
1091
+ unexpected : _ -> unexpectedEvent " FileClose or FileWrite" unexpected
1092
+ -- when it happens that a path gets more than 1 sequence open-write-close
1093
+ -- for it we need to check whether there was a `rename` after the 1st one
1094
+ -- and then check the result of the next one and combine them accordingly
1095
+ -- e.g. atomic + non-atomic -> non-atomic
1096
+ checkRename events =
1097
+ case events of
1098
+ FileRename path : rest ->
1099
+ case checkOpen rest of
1100
+ NoWrites ->
1101
+ -- we write original path here which swapped
1102
+ -- with oldpath in `atomicWritesSink`
1103
+ AtomicWrite (T. unpack $ decodeUtf8OrError path)
1104
+ other ->
1105
+ other
1106
+ noRenames ->
1107
+ case checkOpen noRenames of
1108
+ NoWrites -> NonatomicWrite
1109
+ other -> other
1110
+ unexpectedEvent expected real =
1099
1111
Unexpected $ " expected " ++ expected ++ " , but " ++
1100
1112
show real ++ " was seen"
1101
1113
1102
1114
atomicWritesSink :: (MonadIO m ) => ConduitT (CPid , TraceEvent ) Void m (Map FilePath FileWriteBehavior )
1103
1115
atomicWritesSink =
1104
- extract <$> (fileWritesConduit .| foldlC collectWrite mempty )
1116
+ extract <$> (fileWritesConduit .| foldlC collectWrite Map. empty )
1105
1117
where
1118
+ collectWrite :: Map FilePath [FileWriteEvent ] -> (FilePath , FileWriteEvent ) -> Map FilePath [FileWriteEvent ]
1106
1119
collectWrite m (fp, e) = Map. alter (Just . maybe [e] (e: )) fp m
1120
+ extract :: Map FilePath [FileWriteEvent ] -> Map FilePath FileWriteBehavior
1107
1121
extract m =
1108
1122
let (noRenames, renames) =
1109
1123
partitionEithers . map (analyzeWrites' . second reverse ) $ Map. toList m
1110
1124
in Map. fromList noRenames <> Map. fromList (map (second AtomicWrite ) renames)
1125
+ -- this function (in addition to what `analyzeWrites` does) treats atomic writes
1126
+ -- in a special way: those include a rename and we need to put atomic writes under
1127
+ -- a path which is a target of a corresponding rename
1128
+ -- so in the end we swap path in `AtomicWrite` and its corresponding map key
1111
1129
analyzeWrites' (src, es) = case analyzeWrites es of
1112
1130
AtomicWrite target -> Right (target, src)
1113
1131
other -> Left (src, other)
0 commit comments