Fix: remove old values from feature values
This commit is contained in:
		
							parent
							
								
									4885497069
								
							
						
					
					
						commit
						fbd53e2d5b
					
				|  | @ -1,16 +1,16 @@ | |||
| #!/usr/bin/env python3 | ||||
| 
 | ||||
| import os | ||||
| import aiomqtt | ||||
| import json | ||||
| import asyncio | ||||
| from time import time | ||||
| from pathlib import Path | ||||
| from collections import namedtuple, defaultdict | ||||
| from collections import namedtuple, defaultdict, deque | ||||
| from typing import Dict, Optional, List | ||||
| from Crypto.Cipher import AES | ||||
| import pandas as pd | ||||
| import numpy as np | ||||
| from copy import copy | ||||
| import logging | ||||
| from sklearn import svm | ||||
| from sklearn.model_selection import cross_val_score | ||||
|  | @ -103,10 +103,20 @@ class KnownRoomCsvLogger: | |||
|             file=self.csv_file_handle,) | ||||
| 
 | ||||
| 
 | ||||
| FAR_AWAY_FEATURE_VALUE = 1 | ||||
| class RunningFeatureVector: | ||||
|     FAR_AWAY_FEATURE_VALUE = 1  | ||||
|     MIN_TIME_UNTIL_PREDICTION = 40 # wait until every reachable tracker detected the device | ||||
|     TIME_TO_DELETE_IF_NOT_SEEN = 30 # if device wasn't spotted for this time period, the measure is set to inf | ||||
| 
 | ||||
|     def __init__(self, trackers: List[str]): | ||||
|         self.trackers = trackers | ||||
|         self.feature_vecs_per_device = defaultdict(lambda: [self.FAR_AWAY_FEATURE_VALUE] * len(trackers)) | ||||
|         self.last_measurements = deque() | ||||
|         self.tracker_name_to_idx = {name: i for i, name in enumerate(trackers)} | ||||
|         self.start_time = None | ||||
| 
 | ||||
| def get_feature_value(rssi, tx_power): | ||||
|     @staticmethod | ||||
|     def _get_feature_value(rssi, tx_power): | ||||
|         """Transforms rssi and tx power into a value between 0 and 1, where 0 is close and 1 is far away""" | ||||
|         MIN_RSSI = -90 | ||||
|         MAX_TRANSFORMED_RSSI = 40 | ||||
|  | @ -115,34 +125,48 @@ def get_feature_value(rssi, tx_power): | |||
|             v = 0 | ||||
|         return v / (-MIN_RSSI) | ||||
| 
 | ||||
|     def add_measurement(self, new_measurement: BtleDeviceMeasurement): | ||||
|         if self.start_time is None: | ||||
|             self.start_time = new_measurement.time | ||||
| 
 | ||||
| def training_data_from_df(df: pd.DataFrame, device: str): | ||||
|         self.last_measurements.append(new_measurement) | ||||
|         while len(self.last_measurements) > 0 and new_measurement.time - self.last_measurements[0].time > self.TIME_TO_DELETE_IF_NOT_SEEN: | ||||
|             self.last_measurements.popleft() | ||||
| 
 | ||||
|         feature_vec = [self.FAR_AWAY_FEATURE_VALUE] * len(self.trackers) | ||||
|         for m in self.last_measurements:  | ||||
|             if m.device == new_measurement.device: | ||||
|                 tracker_idx = self.tracker_name_to_idx[m.tracker] | ||||
|                 feature_vec[tracker_idx] = self._get_feature_value(m.rssi, m.tx_power) | ||||
|         return feature_vec if new_measurement.time - self.start_time > self.MIN_TIME_UNTIL_PREDICTION else None | ||||
|      | ||||
| 
 | ||||
| 
 | ||||
| def training_data_from_df(df: pd.DataFrame, device_to_train: str): | ||||
|     """Returns a feature matrix (num_measurement, num_trackers) and a label vector (both numeric) to be used in scikit learn""" | ||||
|     idx_to_tracker = dict(enumerate(df["tracker"].cat.categories)) | ||||
|     tracker_to_idx = {v: k for k, v in idx_to_tracker.items()} | ||||
|     trackers = list(df["tracker"].cat.categories) | ||||
|     idx_to_room = dict(enumerate(df["known_room"].cat.categories)) | ||||
|     room_to_idx = {v: k for k, v in idx_to_room.items()} | ||||
| 
 | ||||
|     last_known_room = None | ||||
|     start_time = None | ||||
|     current_feature = [FAR_AWAY_FEATURE_VALUE] * len(idx_to_tracker) | ||||
| 
 | ||||
|     features = [] | ||||
|     labels = [] | ||||
|      | ||||
|     feature_accumulator = RunningFeatureVector(trackers) | ||||
| 
 | ||||
|     # Feature vectors - rssi column for each room | ||||
|     for i, row in df.iterrows(): | ||||
|         time, device, tracker, rssi, tx_power, known_room = row | ||||
|         if device != device: | ||||
|         m = BtleDeviceMeasurement(time, device, tracker, rssi, tx_power) | ||||
|         if device != device_to_train: | ||||
|             continue | ||||
|         if last_known_room != known_room: | ||||
|             start_time = time | ||||
|             feature_accumulator = RunningFeatureVector(trackers) # reset for new room | ||||
|             last_known_room = known_room | ||||
| 
 | ||||
|         tracker_idx = tracker_to_idx[tracker] | ||||
|         current_feature[tracker_idx] = get_feature_value(rssi, tx_power) | ||||
|         if time - start_time > 20:  # Wait 20secs to have measurements from all trackers | ||||
|             features.append(copy(current_feature)) | ||||
|         feature_vec = feature_accumulator.add_measurement(m) | ||||
|         if feature_vec is not None: | ||||
|             features.append(feature_vec) | ||||
|             labels.append(room_to_idx[known_room]) | ||||
| 
 | ||||
|     return np.array(features), np.array(labels) | ||||
|  | @ -187,9 +211,8 @@ async def async_main( | |||
|     device_decoder: DeviceDecoder, | ||||
|     training_data_logger: KnownRoomCsvLogger, | ||||
| ): | ||||
|     feature_vecs_per_device = defaultdict(lambda: [FAR_AWAY_FEATURE_VALUE] * len(trackers)) | ||||
|     current_rooms = defaultdict(lambda: "unknown") | ||||
|     tracker_name_to_idx = {name: i for i, name in enumerate(trackers)} | ||||
|     feature_accumulator = RunningFeatureVector(trackers)     | ||||
|     async with aiomqtt.Client( | ||||
|         hostname=mqtt_info.server, username=mqtt_info.username, password=mqtt_info.password | ||||
|     ) as client: | ||||
|  | @ -216,22 +239,24 @@ async def async_main( | |||
|                     if m is not None: | ||||
|                         logging.debug(f"Decoded Measurement {m}") | ||||
|                         training_data_logger.report_measure(m) | ||||
|                         tracker_idx = tracker_name_to_idx[m.tracker] | ||||
|                         feature_vecs_per_device[m.device][tracker_idx] = get_feature_value(m.rssi, m.tx_power) | ||||
|                         if classifier is not None: | ||||
|                             room = classifier(m.device, feature_vecs_per_device[m.device]) | ||||
|                         feature_vec =feature_accumulator.add_measurement(m) | ||||
|                         if feature_vec: | ||||
|                             feature_str={tracker : value for tracker, value in zip(trackers, feature_vec)}  | ||||
|                             logging.debug(f"Features: {feature_str}") | ||||
|                         if feature_vec is not None and classifier is not None: | ||||
|                             room = classifier(m.device, feature_vec) | ||||
|                             if room != current_rooms[m.device]: | ||||
|                                 logging.info(f"{m.device} moved room {current_rooms[m.device]} to {room}") | ||||
|                                 current_rooms[m.device] = room | ||||
|                             await client.publish(f"my_btmonitor/ml/{m.device}", room.encode()) | ||||
| 
 | ||||
| 
 | ||||
| def get_classification_func(training_df: pd.DataFrame, log_classifier_scores=False): | ||||
| def get_classification_func(training_df: pd.DataFrame, log_classifier_scores=True): | ||||
|     devices_to_track = list(training_df["device"].unique()) | ||||
|     classifiers = {} | ||||
|     rooms = list(training_df["known_room"].dtype.categories) | ||||
|     for device_to_track in devices_to_track: | ||||
|         features, labels = training_data_from_df(training_df, devices_to_track) | ||||
|         features, labels = training_data_from_df(training_df, device_to_track) | ||||
|         clf = svm.SVC(kernel="rbf") | ||||
|         logging.info(f"Computing cross validation score for {device_to_track}") | ||||
|         if log_classifier_scores: | ||||
|  | @ -258,8 +283,8 @@ if __name__ == "__main__": | |||
|         "aa67542b82c0e05d65c27fb7e313aba5": "martins_apple_watch", | ||||
|         "840e3892644c1ebd1594a9069c14ce0d": "martins_iphone", | ||||
|     } | ||||
| 
 | ||||
|     data_file = Path("training_data.csv") | ||||
|     script_path = os.path.dirname(os.path.realpath(__file__)) | ||||
|     data_file = Path(script_path) / Path("training_data.csv") | ||||
|     training_df = load_measurements_from_csv(data_file) | ||||
|     classification_func = get_classification_func(training_df) | ||||
|     training_data_logger = KnownRoomCsvLogger(data_file) | ||||
|  | @ -267,3 +292,4 @@ if __name__ == "__main__": | |||
|     trackers = list(training_df["tracker"].cat.categories) | ||||
|     devices = list(training_df['device'].cat.categories) | ||||
|     asyncio.run(async_main(mqtt_info, trackers, devices, classification_func, device_decoder, training_data_logger)) | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										1607
									
								
								training_data.csv
								
								
								
								
							
							
						
						
									
										1607
									
								
								training_data.csv
								
								
								
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Loading…
	
		Reference in New Issue