Fix: remove old values from feature values

This commit is contained in:
core 2024-03-11 22:39:11 +01:00
parent 4885497069
commit fbd53e2d5b
2 changed files with 1668 additions and 35 deletions

View File

@ -1,16 +1,16 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import os
import aiomqtt import aiomqtt
import json import json
import asyncio import asyncio
from time import time from time import time
from pathlib import Path from pathlib import Path
from collections import namedtuple, defaultdict from collections import namedtuple, defaultdict, deque
from typing import Dict, Optional, List from typing import Dict, Optional, List
from Crypto.Cipher import AES from Crypto.Cipher import AES
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from copy import copy
import logging import logging
from sklearn import svm from sklearn import svm
from sklearn.model_selection import cross_val_score from sklearn.model_selection import cross_val_score
@ -103,46 +103,70 @@ class KnownRoomCsvLogger:
file=self.csv_file_handle,) file=self.csv_file_handle,)
FAR_AWAY_FEATURE_VALUE = 1 class RunningFeatureVector:
FAR_AWAY_FEATURE_VALUE = 1
MIN_TIME_UNTIL_PREDICTION = 40 # wait until every reachable tracker detected the device
TIME_TO_DELETE_IF_NOT_SEEN = 30 # if device wasn't spotted for this time period, the measure is set to inf
def __init__(self, trackers: List[str]):
self.trackers = trackers
self.feature_vecs_per_device = defaultdict(lambda: [self.FAR_AWAY_FEATURE_VALUE] * len(trackers))
self.last_measurements = deque()
self.tracker_name_to_idx = {name: i for i, name in enumerate(trackers)}
self.start_time = None
@staticmethod
def _get_feature_value(rssi, tx_power):
"""Transforms rssi and tx power into a value between 0 and 1, where 0 is close and 1 is far away"""
MIN_RSSI = -90
MAX_TRANSFORMED_RSSI = 40
v = tx_power - rssi - MAX_TRANSFORMED_RSSI
if v < 0:
v = 0
return v / (-MIN_RSSI)
def add_measurement(self, new_measurement: BtleDeviceMeasurement):
if self.start_time is None:
self.start_time = new_measurement.time
self.last_measurements.append(new_measurement)
while len(self.last_measurements) > 0 and new_measurement.time - self.last_measurements[0].time > self.TIME_TO_DELETE_IF_NOT_SEEN:
self.last_measurements.popleft()
feature_vec = [self.FAR_AWAY_FEATURE_VALUE] * len(self.trackers)
for m in self.last_measurements:
if m.device == new_measurement.device:
tracker_idx = self.tracker_name_to_idx[m.tracker]
feature_vec[tracker_idx] = self._get_feature_value(m.rssi, m.tx_power)
return feature_vec if new_measurement.time - self.start_time > self.MIN_TIME_UNTIL_PREDICTION else None
def get_feature_value(rssi, tx_power): def training_data_from_df(df: pd.DataFrame, device_to_train: str):
"""Transforms rssi and tx power into a value between 0 and 1, where 0 is close and 1 is far away"""
MIN_RSSI = -90
MAX_TRANSFORMED_RSSI = 40
v = tx_power - rssi - MAX_TRANSFORMED_RSSI
if v < 0:
v = 0
return v / (-MIN_RSSI)
def training_data_from_df(df: pd.DataFrame, device: str):
"""Returns a feature matrix (num_measurement, num_trackers) and a label vector (both numeric) to be used in scikit learn""" """Returns a feature matrix (num_measurement, num_trackers) and a label vector (both numeric) to be used in scikit learn"""
idx_to_tracker = dict(enumerate(df["tracker"].cat.categories)) trackers = list(df["tracker"].cat.categories)
tracker_to_idx = {v: k for k, v in idx_to_tracker.items()}
idx_to_room = dict(enumerate(df["known_room"].cat.categories)) idx_to_room = dict(enumerate(df["known_room"].cat.categories))
room_to_idx = {v: k for k, v in idx_to_room.items()} room_to_idx = {v: k for k, v in idx_to_room.items()}
last_known_room = None last_known_room = None
start_time = None
current_feature = [FAR_AWAY_FEATURE_VALUE] * len(idx_to_tracker)
features = [] features = []
labels = [] labels = []
feature_accumulator = RunningFeatureVector(trackers)
# Feature vectors - rssi column for each room # Feature vectors - rssi column for each room
for i, row in df.iterrows(): for i, row in df.iterrows():
time, device, tracker, rssi, tx_power, known_room = row time, device, tracker, rssi, tx_power, known_room = row
if device != device: m = BtleDeviceMeasurement(time, device, tracker, rssi, tx_power)
if device != device_to_train:
continue continue
if last_known_room != known_room: if last_known_room != known_room:
start_time = time feature_accumulator = RunningFeatureVector(trackers) # reset for new room
last_known_room = known_room last_known_room = known_room
feature_vec = feature_accumulator.add_measurement(m)
tracker_idx = tracker_to_idx[tracker] if feature_vec is not None:
current_feature[tracker_idx] = get_feature_value(rssi, tx_power) features.append(feature_vec)
if time - start_time > 20: # Wait 20secs to have measurements from all trackers
features.append(copy(current_feature))
labels.append(room_to_idx[known_room]) labels.append(room_to_idx[known_room])
return np.array(features), np.array(labels) return np.array(features), np.array(labels)
@ -187,9 +211,8 @@ async def async_main(
device_decoder: DeviceDecoder, device_decoder: DeviceDecoder,
training_data_logger: KnownRoomCsvLogger, training_data_logger: KnownRoomCsvLogger,
): ):
feature_vecs_per_device = defaultdict(lambda: [FAR_AWAY_FEATURE_VALUE] * len(trackers))
current_rooms = defaultdict(lambda: "unknown") current_rooms = defaultdict(lambda: "unknown")
tracker_name_to_idx = {name: i for i, name in enumerate(trackers)} feature_accumulator = RunningFeatureVector(trackers)
async with aiomqtt.Client( async with aiomqtt.Client(
hostname=mqtt_info.server, username=mqtt_info.username, password=mqtt_info.password hostname=mqtt_info.server, username=mqtt_info.username, password=mqtt_info.password
) as client: ) as client:
@ -216,22 +239,24 @@ async def async_main(
if m is not None: if m is not None:
logging.debug(f"Decoded Measurement {m}") logging.debug(f"Decoded Measurement {m}")
training_data_logger.report_measure(m) training_data_logger.report_measure(m)
tracker_idx = tracker_name_to_idx[m.tracker] feature_vec =feature_accumulator.add_measurement(m)
feature_vecs_per_device[m.device][tracker_idx] = get_feature_value(m.rssi, m.tx_power) if feature_vec:
if classifier is not None: feature_str={tracker : value for tracker, value in zip(trackers, feature_vec)}
room = classifier(m.device, feature_vecs_per_device[m.device]) logging.debug(f"Features: {feature_str}")
if feature_vec is not None and classifier is not None:
room = classifier(m.device, feature_vec)
if room != current_rooms[m.device]: if room != current_rooms[m.device]:
logging.info(f"{m.device} moved room {current_rooms[m.device]} to {room}") logging.info(f"{m.device} moved room {current_rooms[m.device]} to {room}")
current_rooms[m.device] = room current_rooms[m.device] = room
await client.publish(f"my_btmonitor/ml/{m.device}", room.encode()) await client.publish(f"my_btmonitor/ml/{m.device}", room.encode())
def get_classification_func(training_df: pd.DataFrame, log_classifier_scores=False): def get_classification_func(training_df: pd.DataFrame, log_classifier_scores=True):
devices_to_track = list(training_df["device"].unique()) devices_to_track = list(training_df["device"].unique())
classifiers = {} classifiers = {}
rooms = list(training_df["known_room"].dtype.categories) rooms = list(training_df["known_room"].dtype.categories)
for device_to_track in devices_to_track: for device_to_track in devices_to_track:
features, labels = training_data_from_df(training_df, devices_to_track) features, labels = training_data_from_df(training_df, device_to_track)
clf = svm.SVC(kernel="rbf") clf = svm.SVC(kernel="rbf")
logging.info(f"Computing cross validation score for {device_to_track}") logging.info(f"Computing cross validation score for {device_to_track}")
if log_classifier_scores: if log_classifier_scores:
@ -258,8 +283,8 @@ if __name__ == "__main__":
"aa67542b82c0e05d65c27fb7e313aba5": "martins_apple_watch", "aa67542b82c0e05d65c27fb7e313aba5": "martins_apple_watch",
"840e3892644c1ebd1594a9069c14ce0d": "martins_iphone", "840e3892644c1ebd1594a9069c14ce0d": "martins_iphone",
} }
script_path = os.path.dirname(os.path.realpath(__file__))
data_file = Path("training_data.csv") data_file = Path(script_path) / Path("training_data.csv")
training_df = load_measurements_from_csv(data_file) training_df = load_measurements_from_csv(data_file)
classification_func = get_classification_func(training_df) classification_func = get_classification_func(training_df)
training_data_logger = KnownRoomCsvLogger(data_file) training_data_logger = KnownRoomCsvLogger(data_file)
@ -267,3 +292,4 @@ if __name__ == "__main__":
trackers = list(training_df["tracker"].cat.categories) trackers = list(training_df["tracker"].cat.categories)
devices = list(training_df['device'].cat.categories) devices = list(training_df['device'].cat.categories)
asyncio.run(async_main(mqtt_info, trackers, devices, classification_func, device_decoder, training_data_logger)) asyncio.run(async_main(mqtt_info, trackers, devices, classification_func, device_decoder, training_data_logger))

File diff suppressed because it is too large Load Diff