Skip to content

Commit 20b05af

Browse files
voorloopnulWilliangalvani
authored andcommitted
core: create Major Tom
1 parent afa82ab commit 20b05af

File tree

8 files changed

+325
-0
lines changed

8 files changed

+325
-0
lines changed

core/services/install-services.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ SERVICES=(
3535
ping
3636
versionchooser
3737
wifi
38+
major_tom
3839
)
3940

4041
# We need to install loguru, appdirs and pydantic since they may be used inside setup.py

core/services/major_tom/main.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#! /usr/bin/env python3
2+
import copy
3+
import datetime
4+
import sys
5+
import time
6+
import uuid
7+
from dataclasses import asdict, dataclass
8+
from typing import Any, Dict, Optional
9+
from zoneinfo import ZoneInfo
10+
11+
import loguru
12+
from commonwealth.utils.general import (
13+
local_hardware_identifier,
14+
local_unique_identifier,
15+
)
16+
17+
from src.core import DefaultPayload, TelemetryEngine, get_latency
18+
from src.metrics import Metrics
19+
from src.typedefs import ExtensionInfo, VersionInfo
20+
21+
LOG_SESSION_UUID = str(uuid.uuid4())
22+
23+
SERVICE_NAME = "major_tom"
24+
LOG_FOLDER_PATH = f"/var/logs/blueos/services/{SERVICE_NAME}/"
25+
26+
TELEMETRY_ENDPOINT = "https://telemetry.blueos.cloud/api/v1/anonymous/"
27+
S3_TELEMETRY_ENDPOINT = "https://telemetry.blueos.cloud/api/v1/anonymous/s3/"
28+
29+
30+
# pylint: disable=too-many-instance-attributes
31+
@dataclass
32+
class AnonymousTelemetryRecord:
33+
uptime: float
34+
latency: float
35+
memory_size: int
36+
memory_usage: int
37+
disk_size: int
38+
disk_usage: int
39+
extensions: Optional[list[ExtensionInfo]]
40+
blueos_version: Optional[VersionInfo]
41+
probe_time: float
42+
43+
def json(self) -> dict[str, Any]:
44+
return asdict(self)
45+
46+
47+
def compose_default_record(order: int) -> Dict[str, Any]:
48+
date_time_utc = datetime.datetime.now(ZoneInfo("UTC")).isoformat()
49+
payload = DefaultPayload(
50+
log_session_uuid=LOG_SESSION_UUID,
51+
order=order,
52+
timestamp=date_time_utc,
53+
hardware_id=local_hardware_identifier(),
54+
blueos_id=local_unique_identifier(),
55+
data={},
56+
)
57+
58+
start_probing = time.time()
59+
metrics = Metrics()
60+
record = AnonymousTelemetryRecord(
61+
time.clock_gettime(time.CLOCK_BOOTTIME),
62+
get_latency(),
63+
metrics.memory.total,
64+
metrics.memory.used,
65+
metrics.disk.total,
66+
metrics.disk.used,
67+
metrics.installed_extensions,
68+
metrics.installed_version,
69+
0,
70+
)
71+
record.probe_time = time.time() - start_probing
72+
payload.data = record.json()
73+
return payload.json()
74+
75+
76+
if __name__ == "__main__":
77+
78+
# this is required to have two loggers in the same process
79+
# see https://loguru.readthedocs.io/en/latest/resources/recipes.html#creating-independent-loggers-with-separate-set-of-handlers
80+
loguru.logger.remove()
81+
log_buffer = copy.deepcopy(loguru.logger)
82+
loguru.logger.add(sys.stdout, format="{time} {level} {message}")
83+
84+
TelemetryEngine(
85+
label="anonymous", # used to tag telemetry type. we may have non-anonymous telemetry in the future
86+
endpoint=TELEMETRY_ENDPOINT,
87+
s3_endpoint=S3_TELEMETRY_ENDPOINT,
88+
create_record=compose_default_record,
89+
interval=60 * 5, # 5 minutes
90+
max_file_size=1024 * 1024, # 1Mb
91+
max_file_retention=10,
92+
buffer_folder=LOG_FOLDER_PATH,
93+
log_buffer=log_buffer,
94+
)()

core/services/major_tom/setup.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env python3
2+
3+
import setuptools
4+
5+
setuptools.setup(
6+
name="Major Tom",
7+
version="0.1.0",
8+
description="Sends telemetry back to Ground Control",
9+
license="MIT",
10+
install_requires=[
11+
"psutil==5.9.5",
12+
"requests==2.31.0",
13+
"speedtest-cli==2.1.3",
14+
"Flask==2.3.2",
15+
"loguru==0.7.0",
16+
],
17+
)

core/services/major_tom/src/__init__.py

Whitespace-only changes.

core/services/major_tom/src/core.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
import gzip
2+
import json
3+
import os
4+
import shutil
5+
import time
6+
from dataclasses import asdict, dataclass
7+
from typing import Any, Callable, Dict, List
8+
9+
import loguru
10+
import requests
11+
import speedtest
12+
from loguru import logger
13+
14+
15+
def formatter(record: "loguru.Record") -> str:
16+
# Note this function returns the string to be formatted, not the actual message to be logged
17+
record["extra"]["serialized"] = json.dumps(record["message"])
18+
return "{extra[serialized]}\n"
19+
20+
21+
def is_online() -> bool:
22+
return get_latency() > 0
23+
24+
25+
def get_latency() -> float:
26+
try:
27+
servers: List[str] = []
28+
st = speedtest.Speedtest()
29+
st.get_servers(servers)
30+
best_server = st.get_best_server()
31+
ping = best_server["latency"]
32+
return float(ping)
33+
except Exception:
34+
return -1.0
35+
36+
37+
@dataclass
38+
class DefaultPayload:
39+
log_session_uuid: str
40+
order: int
41+
timestamp: str
42+
hardware_id: str
43+
blueos_id: str
44+
data: dict[str, Any]
45+
46+
def json(self) -> dict[str, Any]:
47+
return asdict(self)
48+
49+
50+
class TelemetryEngine:
51+
# pylint: disable=too-many-arguments
52+
def __init__(
53+
self,
54+
label: str,
55+
endpoint: str,
56+
s3_endpoint: str,
57+
create_record: Callable[[Any], Any],
58+
interval: float,
59+
max_file_size: int,
60+
max_file_retention: int,
61+
buffer_folder: str,
62+
log_buffer: loguru._logger.Logger, # type: ignore
63+
):
64+
self.buffer_file = f"{buffer_folder}/{label}_usage.log"
65+
self.buffer_folder = buffer_folder
66+
67+
self.telemetry_endpoint = endpoint
68+
self.telemetry_s3_endpoint = s3_endpoint
69+
self.create_record = create_record
70+
self.interval = interval
71+
72+
self.log_buffer = log_buffer
73+
self.log_buffer.add(
74+
self.buffer_file,
75+
rotation=max_file_size,
76+
retention=max_file_retention,
77+
format=formatter,
78+
compression="gz",
79+
)
80+
81+
def __call__(self) -> None:
82+
order = 0
83+
while True:
84+
order += 1
85+
record = self.create_record(order)
86+
if self.save(record) == "online":
87+
self.process_buffered_records()
88+
time.sleep(self.interval)
89+
90+
def upload_file(self, file: str) -> bool:
91+
"""
92+
This method requests to telemetry API a presigned url and upload the local archived files.
93+
"""
94+
logger.info(f"uploading file... {file}")
95+
try:
96+
response = requests.get(self.telemetry_s3_endpoint, timeout=5).json()
97+
with open(file, "rb") as fh:
98+
files = {"file": (file, fh)}
99+
r = requests.post(response["url"], data=response["fields"], files=files, timeout=300)
100+
if r.status_code == 204:
101+
logger.info("[Success!]")
102+
return True
103+
except Exception as error:
104+
logger.info("Ground Control to Major Tom. Your circuit's dead, there's something wrong.")
105+
logger.error(f"error upload log file: {error}")
106+
107+
return False
108+
109+
def process_buffered_records(self) -> None:
110+
"""
111+
Check in the buffered folder if there are archived logs to upload. If the agent connects before an archive
112+
is created it will also archive the current buffer file and upload it.
113+
"""
114+
for file in os.listdir(self.buffer_folder):
115+
file_path = os.path.join(self.buffer_folder, file)
116+
117+
# Upload regular archive
118+
if file_path.endswith(".log.gz"):
119+
if self.upload_file(file_path):
120+
os.remove(file_path)
121+
122+
# Archive current buffer and upload it
123+
if file_path.endswith(".log") and os.path.getsize(file_path):
124+
timestamp = int(time.time())
125+
tmp_name = self.buffer_file.replace(".log", f".{timestamp}.log.gz")
126+
with open(self.buffer_file, "rb") as f_in:
127+
with gzip.open(tmp_name, "wb") as f_out:
128+
shutil.copyfileobj(f_in, f_out)
129+
if self.upload_file(tmp_name):
130+
os.remove(tmp_name)
131+
with open(self.buffer_file, "w", encoding="utf-8"):
132+
# create new empty file if not there
133+
pass
134+
135+
def save(self, record: Dict[str, Any]) -> str:
136+
"""
137+
Try to POST the telemetry payload, if it fails for any reason, we buffer it locally.
138+
"""
139+
try:
140+
r = requests.post(self.telemetry_endpoint, json=record, timeout=5)
141+
if r.status_code == 201:
142+
return "online"
143+
except Exception as error:
144+
logger.info("Ground Control to Major Tom. Your circuit's dead, there's something wrong.")
145+
logger.error(f"error posting telemetry to Ground Control: {error}")
146+
147+
self.log_buffer.info(record)
148+
return "offline"
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import http
2+
from functools import cached_property
3+
from typing import List, Optional
4+
5+
import psutil
6+
import requests
7+
from loguru import logger
8+
9+
from src.typedefs import ExtensionInfo, VersionInfo
10+
11+
12+
class Metrics:
13+
@cached_property
14+
def installed_extensions(self) -> Optional[List[ExtensionInfo]]:
15+
try:
16+
req = requests.get("http://localhost/kraken/v1.0/installed_extensions", timeout=3)
17+
if req.status_code == http.client.OK:
18+
return [ExtensionInfo(identifier=rec["identifier"], tag=rec["tag"]) for rec in req.json()]
19+
except Exception as error:
20+
logger.error(f"Error getting installed extensions: {error}")
21+
return None
22+
return []
23+
24+
@cached_property
25+
def disk(self) -> psutil._common.sdiskusage:
26+
return psutil.disk_usage("/")
27+
28+
@cached_property
29+
def memory(self) -> psutil._pslinux.svmem:
30+
return psutil.virtual_memory()
31+
32+
@cached_property
33+
def installed_version(self) -> Optional[VersionInfo]:
34+
try:
35+
req = requests.get("http://localhost/version-chooser/v1.0/version/current", timeout=3)
36+
if req.status_code == 200:
37+
data = req.json()
38+
return VersionInfo(
39+
repository=data["repository"],
40+
tag=data["tag"],
41+
last_modified=data["last_modified"],
42+
sha=data["sha"],
43+
architecture=data["architecture"],
44+
)
45+
46+
except Exception as error:
47+
logger.error(f"Error getting version info: {error}")
48+
return None
49+
return None
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from dataclasses import dataclass
2+
3+
4+
@dataclass
5+
class ExtensionInfo:
6+
identifier: str
7+
tag: str
8+
9+
10+
class VersionInfo:
11+
repository: str
12+
tag: str
13+
last_modified: str
14+
sha: str
15+
architecture: str

core/start-blueos-core

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ SERVICES=(
9393
'nginx',"nice -18 nginx -g \"daemon off;\" -c $TOOLS_PATH/nginx/nginx.conf"
9494
'log_zipper',"nice -20 $SERVICES_PATH/log_zipper/main.py '/shortcuts/system_logs/**/*.log' --max-age-minutes 60"
9595
'bag_of_holding',"$SERVICES_PATH/bag_of_holding/main.py"
96+
'major_tom',"$SERVICES_PATH/major_tom/main.py"
9697
)
9798

9899
tmux -f /etc/tmux.conf start-server

0 commit comments

Comments
 (0)