Identifying multivariate outliers using Z-score method

Поделиться
HTML-код
  • Опубликовано: 12 сен 2024

Комментарии • 2

  • @KnowledgeAmplifier1
    @KnowledgeAmplifier1  3 года назад

    Code:
    clc
    clear all
    close all
    warning off
    a=[0.0005 0.2300
    0.3203 0.0215
    0.2403 0.1816
    0.0066 0.0421
    0.1091 0.1390
    0.0043 0.0167
    0.7709 0.0011
    0.2230 0.2208
    0.3366 0.0003
    0.3492 0.1406
    0.0168 0.3315
    0.0057 0.0514
    0.5466 0.0000
    0.1934 0.9979
    0.2996 0.0002
    0.0818 0.1906
    0.0431 0.0032
    0.5088 0.0071
    0.2352 0.1619
    0.0071 0.1223
    0.6208 0.0073
    0.0343 0.2519
    0.7166 0.0000
    0.2600 0.1111
    0.0018 0.0284
    0.8881 0.0059
    0.1800 0.0016
    0.0273 0.0067
    0.2173 0.0047
    0.0736 0.0001
    0.6168 0.0005
    0.1390 0.4156
    0.1200 0.0925
    0.0098 0.2469
    0.9363 0.0298
    0.0422 0.0024
    0.1237 0.2900
    0.6201 0.0005
    0.5890 0.0267
    0.0075 0.0976];
    scatter(a(:,1),a(:,2),'fill');
    ms=[mean(a(:,1)),mean(a(:,2))];
    hold on;
    plot(ms(1),ms(2),'g+','linewidth',10);
    distance_from_mean=[];
    for i=1:length(a)
    distance_from_mean=[distance_from_mean sqrt((a(i,1)-ms(1))^2+(a(i,2)-ms(2))^2)];
    end
    msa=zscore(distance_from_mean);
    thres=0.2;
    index_position=find(distance_from_mean>thres);
    hold on;
    plot(a(index_position,1),a(index_position,2),'ro','linewidth',5);
    legend('Data','Mean','Outlier');
    Code for GUI:
    function varargout = MV(varargin)
    % MV MATLAB code for MV.fig
    % MV, by itself, creates a new MV or raises the existing
    % singleton*.
    %
    % H = MV returns the handle to a new MV or the handle to
    % the existing singleton*.
    %
    % MV('CALLBACK',hObject,eventData,handles,...) calls the local
    % function named CALLBACK in MV.M with the given input arguments.
    %
    % MV('Property','Value',...) creates a new MV or raises the
    % existing singleton*. Starting from the left, property value pairs are
    % applied to the GUI before MV_OpeningFcn gets called. An
    % unrecognized property name or invalid value makes property application
    % stop. All inputs are passed to MV_OpeningFcn via varargin.
    %
    % *See GUI Options on GUIDE's Tools menu. Choose "GUI allows only one
    % instance to run (singleton)".
    %
    % See also: GUIDE, GUIDATA, GUIHANDLES
    % Edit the above text to modify the response to help MV
    % Last Modified by GUIDE v2.5 25-Oct-2020 09:53:16
    % Begin initialization code - DO NOT EDIT
    gui_Singleton = 1;
    gui_State = struct('gui_Name', mfilename, ...
    'gui_Singleton', gui_Singleton, ...
    'gui_OpeningFcn', @MV_OpeningFcn, ...
    'gui_OutputFcn', @MV_OutputFcn, ...
    'gui_LayoutFcn', [] , ...
    'gui_Callback', []);
    if nargin && ischar(varargin{1})
    gui_State.gui_Callback = str2func(varargin{1});
    end
    if nargout
    [varargout{1:nargout}] = gui_mainfcn(gui_State, varargin{:});
    else
    gui_mainfcn(gui_State, varargin{:});
    end
    % End initialization code - DO NOT EDIT
    % --- Executes just before MV is made visible.
    function MV_OpeningFcn(hObject, eventdata, handles, varargin)
    % This function has no output args, see OutputFcn.
    % hObject handle to figure
    % eventdata reserved - to be defined in a future version of MATLAB
    % handles structure with handles and user data (see GUIDATA)
    % varargin command line arguments to MV (see VARARGIN)
    % Choose default command line output for MV
    handles.output = hObject;
    % Update handles structure
    guidata(hObject, handles);
    % UIWAIT makes MV wait for user response (see UIRESUME)
    % uiwait(handles.figure1);
    % --- Outputs from this function are returned to the command line.
    function varargout = MV_OutputFcn(hObject, eventdata, handles)
    % varargout cell array for returning output args (see VARARGOUT);
    % hObject handle to figure
    % eventdata reserved - to be defined in a future version of MATLAB
    % handles structure with handles and user data (see GUIDATA)
    % Get default command line output from handles structure
    varargout{1} = handles.output;
    % --- Executes on slider movement.
    function slider1_Callback(hObject, eventdata, handles)
    % hObject handle to slider1 (see GCBO)
    % eventdata reserved - to be defined in a future version of MATLAB
    % handles structure with handles and user data (see GUIDATA)
    % Hints: get(hObject,'Value') returns position of slider
    % get(hObject,'Min') and get(hObject,'Max') to determine range of slider
    a=[0.0005 0.2300
    0.3203 0.0215
    0.2403 0.1816
    0.0066 0.0421
    0.1091 0.1390
    0.0043 0.0167
    0.7709 0.0011
    0.2230 0.2208
    0.3366 0.0003
    0.3492 0.1406
    0.0168 0.3315
    0.0057 0.0514
    0.5466 0.0000
    0.1934 0.9979
    0.2996 0.0002
    0.0818 0.1906
    0.0431 0.0032
    0.5088 0.0071
    0.2352 0.1619
    0.0071 0.1223
    0.6208 0.0073
    0.0343 0.2519
    0.7166 0.0000
    0.2600 0.1111
    0.0018 0.0284
    0.8881 0.0059
    0.1800 0.0016
    0.0273 0.0067
    0.2173 0.0047
    0.0736 0.0001
    0.6168 0.0005
    0.1390 0.4156
    0.1200 0.0925
    0.0098 0.2469
    0.9363 0.0298
    0.0422 0.0024
    0.1237 0.2900
    0.6201 0.0005
    0.5890 0.0267
    0.0075 0.0976];
    axes(handles.axes1);
    scatter(a(:,1),a(:,2),'fill');
    axes(handles.axes2);
    scatter(a(:,1),a(:,2),'fill');
    valb=get(hObject,'Value');
    ms=[mean(a(:,1)),mean(a(:,2))];
    hold on;
    plot(ms(1),ms(2),'g+','linewidth',10);
    distance_from_mean=[];
    for i=1:length(a)
    distance_from_mean=[distance_from_mean sqrt((a(i,1)-ms(1))^2+(a(i,2)-ms(2))^2)];
    end
    msa=zscore(distance_from_mean);
    thres=valb;
    index_position=find(distance_from_mean>thres);
    plot(a(index_position,1),a(index_position,2),'ro','linewidth',5);
    bs=num2str(valb);
    set(handles.Value,'String',bs);
    hold off;
    % --- Executes during object creation, after setting all properties.
    function slider1_CreateFcn(hObject, eventdata, handles)
    % hObject handle to slider1 (see GCBO)
    % eventdata reserved - to be defined in a future version of MATLAB
    % handles empty - handles not created until after all CreateFcns called
    % Hint: slider controls usually have a light gray background.
    if isequal(get(hObject,'BackgroundColor'), get(0,'defaultUicontrolBackgroundColor'))
    set(hObject,'BackgroundColor',[.9 .9 .9]);
    end
    function Value_Callback(hObject, eventdata, handles)
    % hObject handle to Value (see GCBO)
    % eventdata reserved - to be defined in a future version of MATLAB
    % handles structure with handles and user data (see GUIDATA)
    % Hints: get(hObject,'String') returns contents of Value as text
    % str2double(get(hObject,'String')) returns contents of Value as a double
    % --- Executes during object creation, after setting all properties.
    function Value_CreateFcn(hObject, eventdata, handles)
    % hObject handle to Value (see GCBO)
    % eventdata reserved - to be defined in a future version of MATLAB
    % handles empty - handles not created until after all CreateFcns called
    % Hint: edit controls usually have a white background on Windows.
    % See ISPC and COMPUTER.
    if ispc && isequal(get(hObject,'BackgroundColor'), get(0,'defaultUicontrolBackgroundColor'))
    set(hObject,'BackgroundColor','white');
    end

    • @djchuache
      @djchuache 2 года назад

      I have a question about this procedure? Where do you add covariance of the datapoints? Because as we can see in your example the standard deviation of the data are not the same in both axis, then we can do misrejection of false outliers. I mean, if you do the same with your first example, visually we can see that the tails of the datapoints will be rejected, not only the isolated outlier in the scatterplot. How do you manage this issue? Or maybe I was wrong. Thank you in advance