Initial commit

This commit is contained in:
core 2024-03-11 20:54:28 +01:00
commit 4885497069
7 changed files with 28575 additions and 0 deletions

12
Dockerfile Normal file
View File

@ -0,0 +1,12 @@
FROM python:3
WORKDIR /opt/bt_monitor_server
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
#COPY bt_monitor_server.py .
#COPY training_data.csv .
CMD [ "python", "./bt_monitor_server.py" ]

95
analysis.py Normal file
View File

@ -0,0 +1,95 @@
from pathlib import Path
import pandas as pd
import numpy as np
from copy import copy
from sklearn.model_selection import cross_val_score
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
def load_measurements(csv_file: Path):
def cleanup_column_name(col_name: str):
clean_name = col_name.replace('#', '').strip()
if clean_name == 'room':
return 'tracker'
return clean_name
df = pd.read_csv(str(csv_file))
# String cleanup in column names and room names
df = df.rename(columns=cleanup_column_name)
df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
df['tracker'] = df['tracker'].astype("category")
df['real_room'] = df['real_room'].astype("category")
return df
FAR_AWAY_FEATURE_VALUE = 1
def get_feature_value(rssi, tx_power):
MIN_RSSI = -90
MAX_TRANSFORMED_RSSI = 40
v = tx_power - rssi - MAX_TRANSFORMED_RSSI
if v < 0:
v = 0
return v / (-MIN_RSSI)
def make_training_data(df: pd.DataFrame, device_to_map):
idx_to_tracker = dict(enumerate(df['tracker'].cat.categories ))
tracker_to_idx = {v: k for k, v in idx_to_tracker.items()}
idx_to_room = dict(enumerate(df['real_room'].cat.categories ))
room_to_idx = {v: k for k, v in idx_to_room.items()}
last_real_room = None
start_time = None
current_feature = [FAR_AWAY_FEATURE_VALUE] * len(idx_to_tracker)
features = []
labels = []
# Feature vectors - rssi column for each room
for i, row in df.iterrows():
time, device, tracker, rssi, tx_power, real_room = row
if device != device_to_map:
continue
if last_real_room != real_room:
start_time = time
last_real_room = real_room
tracker_idx = tracker_to_idx[tracker]
current_feature[tracker_idx] = get_feature_value(rssi, tx_power)
if time - start_time > 20:
features.append(copy(current_feature))
labels.append(room_to_idx[real_room])
return np.array(features), np.array(labels)
def train(features, labels, classes):
clf = svm.SVC(kernel='rbf')
print("Training")
scores = cross_val_score(clf, features, labels, cv=5)
print(scores)
print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))
X_train, X_test, y_train, y_test = train_test_split(features, labels, random_state=0)
clf.fit(X_train, y_train)
cm = confusion_matrix(clf.predict(X_test), y_test)
print(cm)
print(classes)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
disp.plot()
plt.show()
if __name__ == "__main__":
csv_path = Path("/home/martin/code/ansible/roles/bluetooth-monitor/other/collected.csv")
df = load_measurements(csv_path)
features, labels = make_training_data(df, "martins_apple_watch")
print(np.unique(labels))
print(features.shape, labels.shape)
train(features, labels, list(df['real_room'].dtype.categories))

331
bt_monitor_analyze.ipynb Normal file

File diff suppressed because one or more lines are too long

269
bt_monitor_server.py Executable file
View File

@ -0,0 +1,269 @@
#!/usr/bin/env python3
import aiomqtt
import json
import asyncio
from time import time
from pathlib import Path
from collections import namedtuple, defaultdict
from typing import Dict, Optional, List
from Crypto.Cipher import AES
import pandas as pd
import numpy as np
from copy import copy
import logging
from sklearn import svm
from sklearn.model_selection import cross_val_score
logging.basicConfig(level=logging.INFO)
BtleMeasurement = namedtuple("BtleMeasurement", ["time", "tracker", "address", "rssi", "tx_power"])
BtleDeviceMeasurement = namedtuple("BtleDeviceMeasurement", ["time", "device", "tracker", "rssi", "tx_power"])
MqttInfo = namedtuple("MqttInfo", ["server", "username", "password"])
# ------------------------------------------------------- DECODING -------------------------------------------------------------------------
class DeviceDecoder:
"""Decode bluetooth addresses - either simple ones (just address to name) or random changing ones like Apple devices using irk keys"""
def __init__(self, irk_to_devicename: Dict[str, str], address_to_name: Dict[str, str]):
"""
address_to_name: dictionary from bt address as string separated by ":" to a device name
irk_to_devicename is dict with irk as a hex string, mapping to device name
"""
self.irk_to_devicename = {bytes.fromhex(k): v for k, v in irk_to_devicename.items()}
self.address_to_name = address_to_name
def _resolve_rpa(rpa: bytes, irk: bytes) -> bool:
"""Compares the random address rpa to an irk (secret key) and return True if it matches"""
assert len(rpa) == 6
assert len(irk) == 16
key = irk
plain_text = b"\x00" * 16
plain_text = bytearray(plain_text)
plain_text[15] = rpa[3]
plain_text[14] = rpa[4]
plain_text[13] = rpa[5]
plain_text = bytes(plain_text)
cipher = AES.new(key, AES.MODE_ECB)
cipher_text = cipher.encrypt(plain_text)
return cipher_text[15] == rpa[0] and cipher_text[14] == rpa[1] and cipher_text[13] == rpa[2]
def _addr_to_bytes(addr: str) -> bytes:
"""Converts a bluetooth mac address string with semicolons to bytes"""
str_without_colons = addr.replace(":", "")
bytearr = bytearray.fromhex(str_without_colons)
bytearr.reverse()
return bytes(bytearr)
def decode(self, addr: str) -> Optional[str]:
"""addr is a bluetooth address as a string e.g. 4d:24:12:12:34:10"""
for irk, name in self.irk_to_devicename.items():
if DeviceDecoder._resolve_rpa(DeviceDecoder._addr_to_bytes(addr), irk):
return name
return self.address_to_name.get(addr, None)
def __call__(self, m: BtleMeasurement) -> Optional[BtleDeviceMeasurement]:
decoded_device_name = self.decode(m.address)
if not decoded_device_name:
return None
return BtleDeviceMeasurement(m.time, decoded_device_name, m.tracker, m.rssi, m.tx_power)
# ------------------------------------------------------- MACHINE LEARNING ----------------------------------------------------------------
class KnownRoomCsvLogger:
"""Logs known room measurements to be used later as training data for classifier"""
def __init__(self, csv_file: Path):
self.known_room = None
if csv_file.exists():
self.csv_file_handle = open(csv_file, "a")
else:
self.csv_file_handle = open(csv_file, "w")
print(f"#time,device,tracker,rssi,tx_power,known_room", file=csv_file)
def update_known_room(self, known_room: str):
if known_room != self.known_room:
logging.info(f"Updating known_room {self.known_room} -> {known_room}")
self.known_room = known_room
def report_measure(self, m: BtleDeviceMeasurement):
ignore_rooms = ("keins", "?", "none", "unknown")
if self.known_room is None or self.known_room in ignore_rooms:
return
logging.info(f"Appending to training set: {m}")
print(
f"{m.time},{m.device},{m.tracker},{m.rssi},{m.tx_power},{self.known_room}",
file=self.csv_file_handle,)
FAR_AWAY_FEATURE_VALUE = 1
def get_feature_value(rssi, tx_power):
"""Transforms rssi and tx power into a value between 0 and 1, where 0 is close and 1 is far away"""
MIN_RSSI = -90
MAX_TRANSFORMED_RSSI = 40
v = tx_power - rssi - MAX_TRANSFORMED_RSSI
if v < 0:
v = 0
return v / (-MIN_RSSI)
def training_data_from_df(df: pd.DataFrame, device: str):
"""Returns a feature matrix (num_measurement, num_trackers) and a label vector (both numeric) to be used in scikit learn"""
idx_to_tracker = dict(enumerate(df["tracker"].cat.categories))
tracker_to_idx = {v: k for k, v in idx_to_tracker.items()}
idx_to_room = dict(enumerate(df["known_room"].cat.categories))
room_to_idx = {v: k for k, v in idx_to_room.items()}
last_known_room = None
start_time = None
current_feature = [FAR_AWAY_FEATURE_VALUE] * len(idx_to_tracker)
features = []
labels = []
# Feature vectors - rssi column for each room
for i, row in df.iterrows():
time, device, tracker, rssi, tx_power, known_room = row
if device != device:
continue
if last_known_room != known_room:
start_time = time
last_known_room = known_room
tracker_idx = tracker_to_idx[tracker]
current_feature[tracker_idx] = get_feature_value(rssi, tx_power)
if time - start_time > 20: # Wait 20secs to have measurements from all trackers
features.append(copy(current_feature))
labels.append(room_to_idx[known_room])
return np.array(features), np.array(labels)
def load_measurements_from_csv(csv_file: Path) -> pd.DataFrame:
"""Load csv with training data into dataframe"""
def cleanup_column_name(col_name: str):
return col_name.replace("#", "").strip()
df = pd.read_csv(str(csv_file))
# String cleanup in column names and room names
df = df.rename(columns=cleanup_column_name)
df.map(lambda x: x.strip() if isinstance(x, str) else x)
df["tracker"] = df["tracker"].astype("category")
df["known_room"] = df["known_room"].astype("category")
df['device'] = df['device'].astype("category")
return df
async def send_discovery_messages(mqtt_client, device_names):
for device_name in device_names:
topic = f"homeassistant/sensor/my_btmonitor/{device_name}/config"
msg = {
"name": device_name,
"state_topic": f"my_btmonitor/ml/{device_name}",
"expire_after": 30,
"unique_id": device_name,
}
await mqtt_client.publish(topic, json.dumps(msg).encode(), retain=True)
async def async_main(
mqtt_info: MqttInfo,
trackers: List[str],
devices: List[str],
classifier,
device_decoder: DeviceDecoder,
training_data_logger: KnownRoomCsvLogger,
):
feature_vecs_per_device = defaultdict(lambda: [FAR_AWAY_FEATURE_VALUE] * len(trackers))
current_rooms = defaultdict(lambda: "unknown")
tracker_name_to_idx = {name: i for i, name in enumerate(trackers)}
async with aiomqtt.Client(
hostname=mqtt_info.server, username=mqtt_info.username, password=mqtt_info.password
) as client:
await send_discovery_messages(client, devices)
await client.subscribe("my_btmonitor/#")
async for message in client.messages:
current_time = time()
topic = message.topic
if topic.value == "my_btmonitor/known_room":
training_data_logger.update_known_room(message.payload.decode())
else:
splitted_topic = message.topic.value.split("/")
if splitted_topic[0] == "my_btmonitor" and splitted_topic[1] == "raw_measurements":
msg_json = json.loads(message.payload)
measurement = BtleMeasurement(
time=current_time,
tracker=splitted_topic[2],
address=msg_json["address"],
rssi=msg_json["rssi"],
tx_power=msg_json.get("tx_power", 0),
)
logging.debug(f"Got Measurement {measurement}")
m = device_decoder(measurement)
if m is not None:
logging.debug(f"Decoded Measurement {m}")
training_data_logger.report_measure(m)
tracker_idx = tracker_name_to_idx[m.tracker]
feature_vecs_per_device[m.device][tracker_idx] = get_feature_value(m.rssi, m.tx_power)
if classifier is not None:
room = classifier(m.device, feature_vecs_per_device[m.device])
if room != current_rooms[m.device]:
logging.info(f"{m.device} moved room {current_rooms[m.device]} to {room}")
current_rooms[m.device] = room
await client.publish(f"my_btmonitor/ml/{m.device}", room.encode())
def get_classification_func(training_df: pd.DataFrame, log_classifier_scores=False):
devices_to_track = list(training_df["device"].unique())
classifiers = {}
rooms = list(training_df["known_room"].dtype.categories)
for device_to_track in devices_to_track:
features, labels = training_data_from_df(training_df, devices_to_track)
clf = svm.SVC(kernel="rbf")
logging.info(f"Computing cross validation score for {device_to_track}")
if log_classifier_scores:
scores = cross_val_score(clf, features, labels, cv=5)
logging.info(" %0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))
logging.info(f"Training SVM classifier for {device_to_track}")
clf.fit(features, labels)
classifiers[device_to_track] = clf
def classify(device_name, feature_vec):
room_idx = classifiers[device_name].predict([feature_vec])[0]
return rooms[room_idx]
return classify
if __name__ == "__main__":
mqtt_info = MqttInfo(server="homeassistant.fritz.box", username="my_btmonitor", password="8aBIAC14jaKKbla")
# Dict with bt addresses as strings to device name
address_to_name = {}
# Devices with random addresses - need irk key
irk_to_devicename = {
"aa67542b82c0e05d65c27fb7e313aba5": "martins_apple_watch",
"840e3892644c1ebd1594a9069c14ce0d": "martins_iphone",
}
data_file = Path("training_data.csv")
training_df = load_measurements_from_csv(data_file)
classification_func = get_classification_func(training_df)
training_data_logger = KnownRoomCsvLogger(data_file)
device_decoder = DeviceDecoder(irk_to_devicename, address_to_name)
trackers = list(training_df["tracker"].cat.categories)
devices = list(training_df['device'].cat.categories)
asyncio.run(async_main(mqtt_info, trackers, devices, classification_func, device_decoder, training_data_logger))

6
docker-compose.yml Normal file
View File

@ -0,0 +1,6 @@
services:
bt_monitor_server:
build: .
volumes:
- .:/opt/bt_monitor_server

7
requirements.txt Normal file
View File

@ -0,0 +1,7 @@
aiomqtt==2.0.0
numpy==1.26.4
pandas==2.2.1
pycryptodome==3.20.0
scikit-learn==1.4.1.post1
scipy==1.12.0
typing_extensions==4.10.0

27855
training_data.csv Normal file

File diff suppressed because it is too large Load Diff