Fix: remove old values from feature values

This commit is contained in:
core 2024-03-11 22:39:11 +01:00
parent 4885497069
commit fbd53e2d5b
2 changed files with 1668 additions and 35 deletions

View File

@ -1,16 +1,16 @@
#!/usr/bin/env python3
import os
import aiomqtt
import json
import asyncio
from time import time
from pathlib import Path
from collections import namedtuple, defaultdict
from collections import namedtuple, defaultdict, deque
from typing import Dict, Optional, List
from Crypto.Cipher import AES
import pandas as pd
import numpy as np
from copy import copy
import logging
from sklearn import svm
from sklearn.model_selection import cross_val_score
@ -103,46 +103,70 @@ class KnownRoomCsvLogger:
file=self.csv_file_handle,)
FAR_AWAY_FEATURE_VALUE = 1
class RunningFeatureVector:
FAR_AWAY_FEATURE_VALUE = 1
MIN_TIME_UNTIL_PREDICTION = 40 # wait until every reachable tracker detected the device
TIME_TO_DELETE_IF_NOT_SEEN = 30 # if device wasn't spotted for this time period, the measure is set to inf
def __init__(self, trackers: List[str]):
self.trackers = trackers
self.feature_vecs_per_device = defaultdict(lambda: [self.FAR_AWAY_FEATURE_VALUE] * len(trackers))
self.last_measurements = deque()
self.tracker_name_to_idx = {name: i for i, name in enumerate(trackers)}
self.start_time = None
@staticmethod
def _get_feature_value(rssi, tx_power):
"""Transforms rssi and tx power into a value between 0 and 1, where 0 is close and 1 is far away"""
MIN_RSSI = -90
MAX_TRANSFORMED_RSSI = 40
v = tx_power - rssi - MAX_TRANSFORMED_RSSI
if v < 0:
v = 0
return v / (-MIN_RSSI)
def add_measurement(self, new_measurement: BtleDeviceMeasurement):
if self.start_time is None:
self.start_time = new_measurement.time
self.last_measurements.append(new_measurement)
while len(self.last_measurements) > 0 and new_measurement.time - self.last_measurements[0].time > self.TIME_TO_DELETE_IF_NOT_SEEN:
self.last_measurements.popleft()
feature_vec = [self.FAR_AWAY_FEATURE_VALUE] * len(self.trackers)
for m in self.last_measurements:
if m.device == new_measurement.device:
tracker_idx = self.tracker_name_to_idx[m.tracker]
feature_vec[tracker_idx] = self._get_feature_value(m.rssi, m.tx_power)
return feature_vec if new_measurement.time - self.start_time > self.MIN_TIME_UNTIL_PREDICTION else None
def get_feature_value(rssi, tx_power):
"""Transforms rssi and tx power into a value between 0 and 1, where 0 is close and 1 is far away"""
MIN_RSSI = -90
MAX_TRANSFORMED_RSSI = 40
v = tx_power - rssi - MAX_TRANSFORMED_RSSI
if v < 0:
v = 0
return v / (-MIN_RSSI)
def training_data_from_df(df: pd.DataFrame, device: str):
def training_data_from_df(df: pd.DataFrame, device_to_train: str):
"""Returns a feature matrix (num_measurement, num_trackers) and a label vector (both numeric) to be used in scikit learn"""
idx_to_tracker = dict(enumerate(df["tracker"].cat.categories))
tracker_to_idx = {v: k for k, v in idx_to_tracker.items()}
trackers = list(df["tracker"].cat.categories)
idx_to_room = dict(enumerate(df["known_room"].cat.categories))
room_to_idx = {v: k for k, v in idx_to_room.items()}
last_known_room = None
start_time = None
current_feature = [FAR_AWAY_FEATURE_VALUE] * len(idx_to_tracker)
features = []
labels = []
feature_accumulator = RunningFeatureVector(trackers)
# Feature vectors - rssi column for each room
for i, row in df.iterrows():
time, device, tracker, rssi, tx_power, known_room = row
if device != device:
m = BtleDeviceMeasurement(time, device, tracker, rssi, tx_power)
if device != device_to_train:
continue
if last_known_room != known_room:
start_time = time
feature_accumulator = RunningFeatureVector(trackers) # reset for new room
last_known_room = known_room
tracker_idx = tracker_to_idx[tracker]
current_feature[tracker_idx] = get_feature_value(rssi, tx_power)
if time - start_time > 20: # Wait 20secs to have measurements from all trackers
features.append(copy(current_feature))
feature_vec = feature_accumulator.add_measurement(m)
if feature_vec is not None:
features.append(feature_vec)
labels.append(room_to_idx[known_room])
return np.array(features), np.array(labels)
@ -187,9 +211,8 @@ async def async_main(
device_decoder: DeviceDecoder,
training_data_logger: KnownRoomCsvLogger,
):
feature_vecs_per_device = defaultdict(lambda: [FAR_AWAY_FEATURE_VALUE] * len(trackers))
current_rooms = defaultdict(lambda: "unknown")
tracker_name_to_idx = {name: i for i, name in enumerate(trackers)}
feature_accumulator = RunningFeatureVector(trackers)
async with aiomqtt.Client(
hostname=mqtt_info.server, username=mqtt_info.username, password=mqtt_info.password
) as client:
@ -216,22 +239,24 @@ async def async_main(
if m is not None:
logging.debug(f"Decoded Measurement {m}")
training_data_logger.report_measure(m)
tracker_idx = tracker_name_to_idx[m.tracker]
feature_vecs_per_device[m.device][tracker_idx] = get_feature_value(m.rssi, m.tx_power)
if classifier is not None:
room = classifier(m.device, feature_vecs_per_device[m.device])
feature_vec =feature_accumulator.add_measurement(m)
if feature_vec:
feature_str={tracker : value for tracker, value in zip(trackers, feature_vec)}
logging.debug(f"Features: {feature_str}")
if feature_vec is not None and classifier is not None:
room = classifier(m.device, feature_vec)
if room != current_rooms[m.device]:
logging.info(f"{m.device} moved room {current_rooms[m.device]} to {room}")
current_rooms[m.device] = room
await client.publish(f"my_btmonitor/ml/{m.device}", room.encode())
def get_classification_func(training_df: pd.DataFrame, log_classifier_scores=False):
def get_classification_func(training_df: pd.DataFrame, log_classifier_scores=True):
devices_to_track = list(training_df["device"].unique())
classifiers = {}
rooms = list(training_df["known_room"].dtype.categories)
for device_to_track in devices_to_track:
features, labels = training_data_from_df(training_df, devices_to_track)
features, labels = training_data_from_df(training_df, device_to_track)
clf = svm.SVC(kernel="rbf")
logging.info(f"Computing cross validation score for {device_to_track}")
if log_classifier_scores:
@ -258,8 +283,8 @@ if __name__ == "__main__":
"aa67542b82c0e05d65c27fb7e313aba5": "martins_apple_watch",
"840e3892644c1ebd1594a9069c14ce0d": "martins_iphone",
}
data_file = Path("training_data.csv")
script_path = os.path.dirname(os.path.realpath(__file__))
data_file = Path(script_path) / Path("training_data.csv")
training_df = load_measurements_from_csv(data_file)
classification_func = get_classification_func(training_df)
training_data_logger = KnownRoomCsvLogger(data_file)
@ -267,3 +292,4 @@ if __name__ == "__main__":
trackers = list(training_df["tracker"].cat.categories)
devices = list(training_df['device'].cat.categories)
asyncio.run(async_main(mqtt_info, trackers, devices, classification_func, device_decoder, training_data_logger))

File diff suppressed because it is too large Load Diff