Fix: remove old values from feature values
This commit is contained in:
parent
4885497069
commit
fbd53e2d5b
|
@ -1,16 +1,16 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import os
|
||||||
import aiomqtt
|
import aiomqtt
|
||||||
import json
|
import json
|
||||||
import asyncio
|
import asyncio
|
||||||
from time import time
|
from time import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from collections import namedtuple, defaultdict
|
from collections import namedtuple, defaultdict, deque
|
||||||
from typing import Dict, Optional, List
|
from typing import Dict, Optional, List
|
||||||
from Crypto.Cipher import AES
|
from Crypto.Cipher import AES
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from copy import copy
|
|
||||||
import logging
|
import logging
|
||||||
from sklearn import svm
|
from sklearn import svm
|
||||||
from sklearn.model_selection import cross_val_score
|
from sklearn.model_selection import cross_val_score
|
||||||
|
@ -103,46 +103,70 @@ class KnownRoomCsvLogger:
|
||||||
file=self.csv_file_handle,)
|
file=self.csv_file_handle,)
|
||||||
|
|
||||||
|
|
||||||
FAR_AWAY_FEATURE_VALUE = 1
|
class RunningFeatureVector:
|
||||||
|
FAR_AWAY_FEATURE_VALUE = 1
|
||||||
|
MIN_TIME_UNTIL_PREDICTION = 40 # wait until every reachable tracker detected the device
|
||||||
|
TIME_TO_DELETE_IF_NOT_SEEN = 30 # if device wasn't spotted for this time period, the measure is set to inf
|
||||||
|
|
||||||
|
def __init__(self, trackers: List[str]):
|
||||||
|
self.trackers = trackers
|
||||||
|
self.feature_vecs_per_device = defaultdict(lambda: [self.FAR_AWAY_FEATURE_VALUE] * len(trackers))
|
||||||
|
self.last_measurements = deque()
|
||||||
|
self.tracker_name_to_idx = {name: i for i, name in enumerate(trackers)}
|
||||||
|
self.start_time = None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_feature_value(rssi, tx_power):
|
||||||
|
"""Transforms rssi and tx power into a value between 0 and 1, where 0 is close and 1 is far away"""
|
||||||
|
MIN_RSSI = -90
|
||||||
|
MAX_TRANSFORMED_RSSI = 40
|
||||||
|
v = tx_power - rssi - MAX_TRANSFORMED_RSSI
|
||||||
|
if v < 0:
|
||||||
|
v = 0
|
||||||
|
return v / (-MIN_RSSI)
|
||||||
|
|
||||||
|
def add_measurement(self, new_measurement: BtleDeviceMeasurement):
|
||||||
|
if self.start_time is None:
|
||||||
|
self.start_time = new_measurement.time
|
||||||
|
|
||||||
|
self.last_measurements.append(new_measurement)
|
||||||
|
while len(self.last_measurements) > 0 and new_measurement.time - self.last_measurements[0].time > self.TIME_TO_DELETE_IF_NOT_SEEN:
|
||||||
|
self.last_measurements.popleft()
|
||||||
|
|
||||||
|
feature_vec = [self.FAR_AWAY_FEATURE_VALUE] * len(self.trackers)
|
||||||
|
for m in self.last_measurements:
|
||||||
|
if m.device == new_measurement.device:
|
||||||
|
tracker_idx = self.tracker_name_to_idx[m.tracker]
|
||||||
|
feature_vec[tracker_idx] = self._get_feature_value(m.rssi, m.tx_power)
|
||||||
|
return feature_vec if new_measurement.time - self.start_time > self.MIN_TIME_UNTIL_PREDICTION else None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_feature_value(rssi, tx_power):
|
def training_data_from_df(df: pd.DataFrame, device_to_train: str):
|
||||||
"""Transforms rssi and tx power into a value between 0 and 1, where 0 is close and 1 is far away"""
|
|
||||||
MIN_RSSI = -90
|
|
||||||
MAX_TRANSFORMED_RSSI = 40
|
|
||||||
v = tx_power - rssi - MAX_TRANSFORMED_RSSI
|
|
||||||
if v < 0:
|
|
||||||
v = 0
|
|
||||||
return v / (-MIN_RSSI)
|
|
||||||
|
|
||||||
|
|
||||||
def training_data_from_df(df: pd.DataFrame, device: str):
|
|
||||||
"""Returns a feature matrix (num_measurement, num_trackers) and a label vector (both numeric) to be used in scikit learn"""
|
"""Returns a feature matrix (num_measurement, num_trackers) and a label vector (both numeric) to be used in scikit learn"""
|
||||||
idx_to_tracker = dict(enumerate(df["tracker"].cat.categories))
|
trackers = list(df["tracker"].cat.categories)
|
||||||
tracker_to_idx = {v: k for k, v in idx_to_tracker.items()}
|
|
||||||
idx_to_room = dict(enumerate(df["known_room"].cat.categories))
|
idx_to_room = dict(enumerate(df["known_room"].cat.categories))
|
||||||
room_to_idx = {v: k for k, v in idx_to_room.items()}
|
room_to_idx = {v: k for k, v in idx_to_room.items()}
|
||||||
|
|
||||||
last_known_room = None
|
last_known_room = None
|
||||||
start_time = None
|
|
||||||
current_feature = [FAR_AWAY_FEATURE_VALUE] * len(idx_to_tracker)
|
|
||||||
|
|
||||||
features = []
|
features = []
|
||||||
labels = []
|
labels = []
|
||||||
|
|
||||||
|
feature_accumulator = RunningFeatureVector(trackers)
|
||||||
|
|
||||||
# Feature vectors - rssi column for each room
|
# Feature vectors - rssi column for each room
|
||||||
for i, row in df.iterrows():
|
for i, row in df.iterrows():
|
||||||
time, device, tracker, rssi, tx_power, known_room = row
|
time, device, tracker, rssi, tx_power, known_room = row
|
||||||
if device != device:
|
m = BtleDeviceMeasurement(time, device, tracker, rssi, tx_power)
|
||||||
|
if device != device_to_train:
|
||||||
continue
|
continue
|
||||||
if last_known_room != known_room:
|
if last_known_room != known_room:
|
||||||
start_time = time
|
feature_accumulator = RunningFeatureVector(trackers) # reset for new room
|
||||||
last_known_room = known_room
|
last_known_room = known_room
|
||||||
|
feature_vec = feature_accumulator.add_measurement(m)
|
||||||
tracker_idx = tracker_to_idx[tracker]
|
if feature_vec is not None:
|
||||||
current_feature[tracker_idx] = get_feature_value(rssi, tx_power)
|
features.append(feature_vec)
|
||||||
if time - start_time > 20: # Wait 20secs to have measurements from all trackers
|
|
||||||
features.append(copy(current_feature))
|
|
||||||
labels.append(room_to_idx[known_room])
|
labels.append(room_to_idx[known_room])
|
||||||
|
|
||||||
return np.array(features), np.array(labels)
|
return np.array(features), np.array(labels)
|
||||||
|
@ -187,9 +211,8 @@ async def async_main(
|
||||||
device_decoder: DeviceDecoder,
|
device_decoder: DeviceDecoder,
|
||||||
training_data_logger: KnownRoomCsvLogger,
|
training_data_logger: KnownRoomCsvLogger,
|
||||||
):
|
):
|
||||||
feature_vecs_per_device = defaultdict(lambda: [FAR_AWAY_FEATURE_VALUE] * len(trackers))
|
|
||||||
current_rooms = defaultdict(lambda: "unknown")
|
current_rooms = defaultdict(lambda: "unknown")
|
||||||
tracker_name_to_idx = {name: i for i, name in enumerate(trackers)}
|
feature_accumulator = RunningFeatureVector(trackers)
|
||||||
async with aiomqtt.Client(
|
async with aiomqtt.Client(
|
||||||
hostname=mqtt_info.server, username=mqtt_info.username, password=mqtt_info.password
|
hostname=mqtt_info.server, username=mqtt_info.username, password=mqtt_info.password
|
||||||
) as client:
|
) as client:
|
||||||
|
@ -216,22 +239,24 @@ async def async_main(
|
||||||
if m is not None:
|
if m is not None:
|
||||||
logging.debug(f"Decoded Measurement {m}")
|
logging.debug(f"Decoded Measurement {m}")
|
||||||
training_data_logger.report_measure(m)
|
training_data_logger.report_measure(m)
|
||||||
tracker_idx = tracker_name_to_idx[m.tracker]
|
feature_vec =feature_accumulator.add_measurement(m)
|
||||||
feature_vecs_per_device[m.device][tracker_idx] = get_feature_value(m.rssi, m.tx_power)
|
if feature_vec:
|
||||||
if classifier is not None:
|
feature_str={tracker : value for tracker, value in zip(trackers, feature_vec)}
|
||||||
room = classifier(m.device, feature_vecs_per_device[m.device])
|
logging.debug(f"Features: {feature_str}")
|
||||||
|
if feature_vec is not None and classifier is not None:
|
||||||
|
room = classifier(m.device, feature_vec)
|
||||||
if room != current_rooms[m.device]:
|
if room != current_rooms[m.device]:
|
||||||
logging.info(f"{m.device} moved room {current_rooms[m.device]} to {room}")
|
logging.info(f"{m.device} moved room {current_rooms[m.device]} to {room}")
|
||||||
current_rooms[m.device] = room
|
current_rooms[m.device] = room
|
||||||
await client.publish(f"my_btmonitor/ml/{m.device}", room.encode())
|
await client.publish(f"my_btmonitor/ml/{m.device}", room.encode())
|
||||||
|
|
||||||
|
|
||||||
def get_classification_func(training_df: pd.DataFrame, log_classifier_scores=False):
|
def get_classification_func(training_df: pd.DataFrame, log_classifier_scores=True):
|
||||||
devices_to_track = list(training_df["device"].unique())
|
devices_to_track = list(training_df["device"].unique())
|
||||||
classifiers = {}
|
classifiers = {}
|
||||||
rooms = list(training_df["known_room"].dtype.categories)
|
rooms = list(training_df["known_room"].dtype.categories)
|
||||||
for device_to_track in devices_to_track:
|
for device_to_track in devices_to_track:
|
||||||
features, labels = training_data_from_df(training_df, devices_to_track)
|
features, labels = training_data_from_df(training_df, device_to_track)
|
||||||
clf = svm.SVC(kernel="rbf")
|
clf = svm.SVC(kernel="rbf")
|
||||||
logging.info(f"Computing cross validation score for {device_to_track}")
|
logging.info(f"Computing cross validation score for {device_to_track}")
|
||||||
if log_classifier_scores:
|
if log_classifier_scores:
|
||||||
|
@ -258,8 +283,8 @@ if __name__ == "__main__":
|
||||||
"aa67542b82c0e05d65c27fb7e313aba5": "martins_apple_watch",
|
"aa67542b82c0e05d65c27fb7e313aba5": "martins_apple_watch",
|
||||||
"840e3892644c1ebd1594a9069c14ce0d": "martins_iphone",
|
"840e3892644c1ebd1594a9069c14ce0d": "martins_iphone",
|
||||||
}
|
}
|
||||||
|
script_path = os.path.dirname(os.path.realpath(__file__))
|
||||||
data_file = Path("training_data.csv")
|
data_file = Path(script_path) / Path("training_data.csv")
|
||||||
training_df = load_measurements_from_csv(data_file)
|
training_df = load_measurements_from_csv(data_file)
|
||||||
classification_func = get_classification_func(training_df)
|
classification_func = get_classification_func(training_df)
|
||||||
training_data_logger = KnownRoomCsvLogger(data_file)
|
training_data_logger = KnownRoomCsvLogger(data_file)
|
||||||
|
@ -267,3 +292,4 @@ if __name__ == "__main__":
|
||||||
trackers = list(training_df["tracker"].cat.categories)
|
trackers = list(training_df["tracker"].cat.categories)
|
||||||
devices = list(training_df['device'].cat.categories)
|
devices = list(training_df['device'].cat.categories)
|
||||||
asyncio.run(async_main(mqtt_info, trackers, devices, classification_func, device_decoder, training_data_logger))
|
asyncio.run(async_main(mqtt_info, trackers, devices, classification_func, device_decoder, training_data_logger))
|
||||||
|
|
||||||
|
|
1607
training_data.csv
1607
training_data.csv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue