none
Integration of audio and skeleton RRS feed

  • Question

  • Hey guys,

    Anyone has an idea how to call a method  with audio command which requires skeleton data.

    i used the synchronization context for audio and skeleton but its not working.

    I want to call the lift box method when i say "start" and it should execute until specified co-ordinates are matched(parameters) and then it calls to put_box and then should return.

    What happens is it calls the lift_box but due to various noise in background(some stupid noise) it recognises other words from the command set and doesnt work out.

    Sorry for my bad writing..

     

     

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Windows;
    using System.Windows.Controls;
    using System.Windows.Data;
    using System.Windows.Documents;
    using System.Windows.Input;
    using System.Windows.Media;
    using System.Windows.Media.Imaging;
    using System.Windows.Navigation;
    using System.Windows.Shapes;
    using Microsoft.Research.Kinect.Nui;
    //using Coding4Fun.Kinect.Wpf;
     
    using Microsoft.Research.Kinect.Audio;
    using Microsoft.Speech.AudioFormat;
    using Microsoft.Speech.Recognition;
     
    using System.Speech.Synthesis;
    using System.Diagnostics;//Stopwatch,Debug.WriteLine
    using System.Threading;//SynchronizationContext
    using System.IO;
     
    namespace skeleton_audio
    {
        /// Interaction logic for MainWindow.xaml
        public partial class MainWindow : Window
        {
            public MainWindow()
            {
                InitializeComponent();
            }
            Runtime nui = Runtime.Kinects[0];
            int totalFrames = 0;
            int lastFrames = 0;
            DateTime lastTime = DateTime.MaxValue;
     
            // Audio
     
            private System.Threading.Thread m_audit_thread;
            SynchronizationContext m_context;                           //synchronization thread for audio and video
            private const string RecognizerId = "SR_MS_en-US_Kinect_10.0";  //for selecting kinect as audio device
            private SpeechSynthesizer synthesizer;
            private bool m_bUseSpeechRecog = true;
            private KinectAudioSource source;
            private RecognizerInfo ri;
            private SpeechRecognitionEngine sre;        
            private Stream s;
            Choices command_set;             //for specifying command set
            GrammarBuilder gb;
            Grammar g;
            int flag = 1;
            SkeletonFrame lastframe;
     
            // We want to control how depth data gets converted into false-color data
            // for more intuitive visualization, so we keep 32-bit color frame buffer versions of
            // these, to be updated whenever we receive and process a 16-bit frame.
            const int RED_IDX = 2;
            const int GREEN_IDX = 1;
            const int BLUE_IDX = 0;
            byte[] depthFrame32 = new byte[320 * 240 * 4];
            
            //for specifying colour to various joints
            Dictionary<JointID, Brush> jointcommand_set = new Dictionary<JointID, Brush>() { 
                {JointID.HipCenter, new SolidColorBrush(Color.FromRgb(169, 176, 155))},
                {JointID.Spine, new SolidColorBrush(Color.FromRgb(169, 176, 155))},
                {JointID.ShoulderCenter, new SolidColorBrush(Color.FromRgb(168, 230, 29))},
                {JointID.Head, new SolidColorBrush(Color.FromRgb(200, 0,   0))},
                {JointID.ShoulderLeft, new SolidColorBrush(Color.FromRgb(79,  84,  33))},
                {JointID.ElbowLeft, new SolidColorBrush(Color.FromRgb(84,  33,  42))},
                {JointID.WristLeft, new SolidColorBrush(Color.FromRgb(255, 126, 0))},
                {JointID.HandLeft, new SolidColorBrush(Color.FromRgb(215,  86, 0))},
                {JointID.ShoulderRight, new SolidColorBrush(Color.FromRgb(33,  79,  84))},
                {JointID.ElbowRight, new SolidColorBrush(Color.FromRgb(33,  33,  84))},
                {JointID.WristRight, new SolidColorBrush(Color.FromRgb(77,  109, 243))},
                {JointID.HandRight, new SolidColorBrush(Color.FromRgb(37,   69, 243))},
                {JointID.HipLeft, new SolidColorBrush(Color.FromRgb(77,  109, 243))},
                {JointID.KneeLeft, new SolidColorBrush(Color.FromRgb(69,  33,  84))},
                {JointID.AnkleLeft, new SolidColorBrush(Color.FromRgb(229, 170, 122))},
                {JointID.FootLeft, new SolidColorBrush(Color.FromRgb(255, 126, 0))},
                {JointID.HipRight, new SolidColorBrush(Color.FromRgb(181, 165, 213))},
                {JointID.KneeRight, new SolidColorBrush(Color.FromRgb(71, 222,  76))},
                {JointID.AnkleRight, new SolidColorBrush(Color.FromRgb(245, 228, 156))},
                {JointID.FootRight, new SolidColorBrush(Color.FromRgb(77,  109, 243))}
            };
     
            //actual program execution begins here
            private void Window_Loaded(object sender, RoutedEventArgs e)
            {
                nui.VideoFrameReady += new EventHandler<ImageFrameReadyEventArgs>(nui_VideoFrameReady);
                nui.SkeletonFrameReady += new EventHandler<SkeletonFrameReadyEventArgs>(nui_SkeletonFrameReady);
                nui.DepthFrameReady += new EventHandler<ImageFrameReadyEventArgs>(nui_DepthFrameReady);
     
                
                nui.Initialize(RuntimeOptions.UseColor | RuntimeOptions.UseSkeletalTracking| RuntimeOptions.UseDepthAndPlayerIndex);
     
                lastTime = DateTime.Now;
     
                nui.VideoStream.Open(ImageStreamType.Video, 2, ImageResolution.Resolution640x480, ImageType.Color);
                nui.DepthStream.Open(ImageStreamType.Depth, 2, ImageResolution.Resolution320x240, ImageType.DepthAndPlayerIndex);
     
                m_context = SynchronizationContext.Current;
     
                //m_audit_thread = new System.Threading.Thread(new System.Threading.ThreadStart(hawwa));
                m_audit_thread = new System.Threading.Thread(new System.Threading.ThreadStart(AudioInit));
                m_audit_thread.Start();             //audio synchronization
              //  synthesizer = new SpeechSynthesizer();
                //synthesizer.SelectVoice("Microsoft Anna");
            }
     
            //to convert 16 bit depth data into 32 bit(includes RGB colouring for individual person)
            //depth intensity : 8 bit -> 11 bit
            //lower 3 bit - person index
            byte[] convertDepthFrame(byte[] depthFrame16)
            {
                for (int i16 = 0, i32 = 0; i16 < depthFrame16.Length && i32 < depthFrame32.Length; i16 += 2, i32 += 4)
                {
                    int player = depthFrame16[i16] & 0x07;
                    int realDepth = (depthFrame16[i16 + 1] << 5) | (depthFrame16[i16] >> 3);
                    // transform 13-bit depth information into an 8-bit intensity appropriate
                    // for display (we disregard information in most significant bit)
                    byte intensity = (byte)(255 - (255 * realDepth / 0x0fff));
     
                    depthFrame32[i32 + RED_IDX] = 0;
                    depthFrame32[i32 + GREEN_IDX] = 0;
                    depthFrame32[i32 + BLUE_IDX] = 0;
     
                    // choose different display command_set based on player
                    switch (player)
                    {
                        case 0:
                            depthFrame32[i32 + RED_IDX] = (byte)(intensity / 2);
                            depthFrame32[i32 + GREEN_IDX] = (byte)(intensity / 2);
                            depthFrame32[i32 + BLUE_IDX] = (byte)(intensity / 2);
                            break;
                        case 1:
                            depthFrame32[i32 + RED_IDX] = intensity;
                            break;
                        case 2:
                            depthFrame32[i32 + GREEN_IDX] = intensity;
                            break;
                        case 3:
                            depthFrame32[i32 + RED_IDX] = (byte)(intensity / 4);
                            depthFrame32[i32 + GREEN_IDX] = (byte)(intensity);
                            depthFrame32[i32 + BLUE_IDX] = (byte)(intensity);
                            break;
                        case 4:
                            depthFrame32[i32 + RED_IDX] = (byte)(intensity);
                            depthFrame32[i32 + GREEN_IDX] = (byte)(intensity);
                            depthFrame32[i32 + BLUE_IDX] = (byte)(intensity / 4);
                            break;
                        case 5:
                            depthFrame32[i32 + RED_IDX] = (byte)(intensity);
                            depthFrame32[i32 + GREEN_IDX] = (byte)(intensity / 4);
                            depthFrame32[i32 + BLUE_IDX] = (byte)(intensity);
                            break;
                        case 6:
                            depthFrame32[i32 + RED_IDX] = (byte)(intensity / 2);
                            depthFrame32[i32 + GREEN_IDX] = (byte)(intensity / 2);
                            depthFrame32[i32 + BLUE_IDX] = (byte)(intensity);
                            break;
                        case 7:
                            depthFrame32[i32 + RED_IDX] = (byte)(255 - intensity);
                            depthFrame32[i32 + GREEN_IDX] = (byte)(255 - intensity);
                            depthFrame32[i32 + BLUE_IDX] = (byte)(255 - intensity);
                            break;
                    }
                }
                return depthFrame32;
            }
     
            //provides depth frames for displaying
            void nui_DepthFrameReady(object sender, ImageFrameReadyEventArgs e)
            {
                PlanarImage Image = e.ImageFrame.Image;
                byte[] convertedDepthFrame = convertDepthFrame(Image.Bits);     //for coloring various players
     
                image2.Source = BitmapSource.Create(
                    Image.Width, Image.Height, 96, 96, PixelFormats.Bgr32, null, convertedDepthFrame, Image.Width * 4);
     
                //for calculating frame-rate
                ++totalFrames;
                DateTime cur = DateTime.Now;
                if (cur.Subtract(lastTime) > TimeSpan.FromSeconds(1))
                {
                    int frameDiff = totalFrames - lastFrames;
                    lastFrames = totalFrames;
                    lastTime = cur;
                 //   textbox1.Text = frameDiff.ToString() + " fps";
                }
            }
     
            
            //for displaying skeleton
            void nui_SkeletonFrameReady(object sender, SkeletonFrameReadyEventArgs e)
            {
                SkeletonFrame skeletonFrame = e.SkeletonFrame;
                lastframe = e.SkeletonFrame;
                int iSkeleton = 0;
                
                //for colouring 5 parts of body like left hand,right hand, left leg, right leg, head to hip-centre
                Brush[] brushes = new Brush[6];
                brushes[0] = new SolidColorBrush(Color.FromRgb(255, 0, 0));
                brushes[1] = new SolidColorBrush(Color.FromRgb(0, 255, 0));
                brushes[2] = new SolidColorBrush(Color.FromRgb(64, 255, 255));
                brushes[3] = new SolidColorBrush(Color.FromRgb(255, 255, 64));
                brushes[4] = new SolidColorBrush(Color.FromRgb(255, 64, 255));
                brushes[5] = new SolidColorBrush(Color.FromRgb(128, 128, 255));
     
                skeleton.Children.Clear();
                foreach (SkeletonData data in skeletonFrame.Skeletons)
                {
                    if (SkeletonTrackingState.Tracked == data.TrackingState)
                    {
                        // Draw bones
                        Brush brush = brushes[iSkeleton % brushes.Length];
                        skeleton.Children.Add(getBodySegment(data.Joints, brush, JointID.HipCenter, JointID.Spine, JointID.ShoulderCenter, JointID.Head));
                        skeleton.Children.Add(getBodySegment(data.Joints, brush, JointID.ShoulderCenter, JointID.ShoulderLeft, JointID.ElbowLeft, JointID.WristLeft, JointID.HandLeft));
                        skeleton.Children.Add(getBodySegment(data.Joints, brush, JointID.ShoulderCenter, JointID.ShoulderRight, JointID.ElbowRight, JointID.WristRight, JointID.HandRight));
                        skeleton.Children.Add(getBodySegment(data.Joints, brush, JointID.HipCenter, JointID.HipLeft, JointID.KneeLeft, JointID.AnkleLeft, JointID.FootLeft));
                        skeleton.Children.Add(getBodySegment(data.Joints, brush, JointID.HipCenter, JointID.HipRight, JointID.KneeRight, JointID.AnkleRight, JointID.FootRight));
     
                        // Draw joints
                        foreach (Joint joint in data.Joints)
                        {
                            Point jointPos = getDisplayPosition(joint);
                            Line jointLine = new Line();
                            jointLine.X1 = jointPos.X - 3;
                            jointLine.X2 = jointLine.X1 + 6;
                            jointLine.Y1 = jointLine.Y2 = jointPos.Y;
                            jointLine.Stroke = jointcommand_set[joint.ID];
                            jointLine.StrokeThickness = 6;
                            skeleton.Children.Add(jointLine);
                        }
                    }
                    iSkeleton++;
                } // for each skeleton
            }
     
            //for displaying video data
            void nui_VideoFrameReady(object sender, ImageFrameReadyEventArgs e)
            {
                PlanarImage img = e.ImageFrame.Image;
                image1.Source = BitmapSource.Create(img.Width, img.Height, 96, 96, PixelFormats.Bgr32, null, img.Bits, img.Width * img.BytesPerPixel);
                
            }
     
            //for converting real-time co-ordinates(skeleton joints) first into depth data and then to color
            private Point getDisplayPosition(Joint joint)
            {
                float depthX, depthY;
                nui.SkeletonEngine.SkeletonToDepthImage(joint.Position, out depthX, out depthY);
                depthX = depthX * 320; //convert to 320, 240 space
                depthY = depthY * 240; //convert to 320, 240 space
                int colorX, colorY;
                ImageViewArea iv = new ImageViewArea();
                // only ImageResolution.Resolution640x480 is supported at this point
                nui.NuiCamera.GetColorPixelCoordinatesFromDepthPixel(ImageResolution.Resolution640x480, iv, (int)depthX, (int)depthY, (short)0, out colorX, out colorY);
     
                // map back to skeleton.Width & skeleton.Height
                return new Point((int)(skeleton.Width * colorX / 640.0), (int)(skeleton.Height * colorY / 480));
            }
     
            Polyline getBodySegment(Microsoft.Research.Kinect.Nui.JointsCollection joints, Brush brush, params JointID[] ids)
            {
                //to display X co-ordinate of right hand from kinect sensor
                float x = joints[JointID.HandRight].Position.X;
                textbox1.Text = x.ToString();
     
                //to display Y co-ordinate of right hand from kinect sensor
                float y = joints[JointID.HandRight].Position.Y;
                textbox2.Text = y.ToString();
     
                //to display Z co-ordinate of right hand from kinect sensor
                float z = joints[JointID.HandRight].Position.Z;
                if (z < 0.8)
                {
                    textbox3.Text = "less dist.";
                }
                else
                {
                    textbox3.Text = z.ToString();
                }
     
                
        //      lift_box(x, z, -0.6f, -0.2f, 2.0f, 2.4f);
     
                //to connect all joints
                PointCollection points = new PointCollection(ids.Length);
                for (int i = 0; i < ids.Length; ++i)
                {
                    points.Add(getDisplayPosition(joints[ids[i]]));
                }
     
                Polyline polyline = new Polyline();
                polyline.Points = points;
                polyline.Stroke = brush;
                polyline.StrokeThickness = 5;
                return polyline;
            }
     
    
    // Lift Box Method
            float x, z;
     
            public void lift_box(float xmin, float xmax, float zmin, float zmax)
            {
                               
                    foreach(SkeletonData data in lastframe.Skeletons){
                    if (SkeletonTrackingState.Tracked == data.TrackingState)
                    {
                        Microsoft.Research.Kinect.Nui.JointsCollection joints = data.Joints;
                        x = joints[JointID.HandRight].Position.X;
                        z = joints[JointID.HandRight].Position.Z;
                        if (flag == 1)
                        {
                            Speak("rohan");
     
                            if ((z > zmax || z < zmin) || (x > xmax || x < xmin))
                            {
                                if (x < xmin)
                                    Speak("Move Right");
                                else if (x > xmax)
                                    Speak("Move Left");
                                else if (z < zmin)
                                    Speak("Move Away from Kinect");
                                else if (z > zmax)
                                    Speak("Move Toward Kinect");
                            }
                            else // ((z > zmin && z < zmax) )//&& (x < xmax && x > xmin))
                            {
                                Speak("Lift Box");
                                flag = 0;
                                if (flag == 2)
                                {
                                    return;
                                }
                                put_box(x,z,1.2f, 1.6f, 3.2f, 3.6f);
                            }
                        }
                        else
                        {
                            
                            {
                                if (flag == 2)
                                {
                                    return;
                                }
                                Speak("sachin");
                                put_box(x, z, 1.2f, 1.6f, 3.2f, 3.6f);
                            }
                        }
                    }
                }
               
     
            }
    
    
    //Put Box Method
            public void put_box(float x, float z,float xmin, float xmax, float zmin, float zmax)
            {
                
                if (flag == 0)
                {
                    if ((z > zmax || z < zmin) || (x > xmax || x < xmin))
                    {
                        if (x < xmin)
                            Speak("Move Right");
                        else if (x > xmax)
                            Speak("Move Left"); 
                        else if (z < zmin)
                            Speak("Move Away from Kinect");
                        else if (z > zmax)
                            Speak("Move Toward Kinect");
                    }
                    else// ((z > zmin && z < zmax) && (x < xmax && x > xmin))
                    {
                        
                        Speak("Put Box");
                        flag = 2;
                        return;
                    }
                }
            }
    
     
            private void Window_Closed(object sender, EventArgs e)
            {
                nui.Uninitialize();
            }
     
                
       
    
    // AudioCommands speech recognition
        public void AudioInit()
            {
                source = new KinectAudioSource();
                
                synthesizer = new SpeechSynthesizer();
                synthesizer.SelectVoice("Microsoft Anna");
                //synthesizer.SelectVoice("Microsoft Server Speech Text to Speech Voice (en-US, Helen)");
     
     
                source.FeatureMode = true;
                source.AutomaticGainControl = false; //Important to turn this off for speech recognition
                source.SystemMode = SystemMode.OptibeamArrayOnly;//No AEC for this sample
     
                ri = SpeechRecognitionEngine.InstalledRecognizers().Where(r => r.Id == RecognizerId).FirstOrDefault();
     
                if (ri == null)
                {
                    return;
                }
     
                sre = new SpeechRecognitionEngine(ri.Id);
     
                command_set = new Choices();         //to specify the command set
     
                command_set.Add("start");
                command_set.Add("next");
                
     
                gb = new GrammarBuilder();
                gb.Culture = ri.Culture;
                gb.Append(command_set);
     
     
                // Create the actual Grammar instance, and then load it into the speech recognizer.
                g = new Grammar(gb);
     
                sre.LoadGrammar(g);
                sre.SpeechRecognized += SreSpeechRecognized;
                sre.SpeechHypothesized += SreSpeechHypothesized;
                sre.SpeechRecognitionRejected += SreSpeechRecognitionRejected;
     
                s = source.Start();
     
                sre.SetInputToAudioStream(s,
                                          new SpeechAudioFormatInfo(
                                              EncodingFormat.Pcm, 16000, 16, 1,
                                              32000, 2, null));
     
             
                sre.RecognizeAsync(RecognizeMode.Multiple);
                Speak("I am ready");
            }
     
            //sound we hear from the speaker at the beginning
            public void Speak(string textToSpeak)
            {
                if (m_bUseSpeechRecog == false)
                {
                    return;
                }
     
                //if the synthesizer is ready (i.e. not already talking), speak the text
                if (synthesizer.State == SynthesizerState.Ready)
                {
                    synthesizer.SpeakAsync(textToSpeak);
                }
            }
     
            
            void SreSpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
            {
                Debug.WriteLine("Hyp word=" + e.Result.Text);
            }
     
            
            void SreSpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
            {
                Debug.WriteLine("reject word=" + e.Result.Text);
                if (e.Result != null)
                {
                    //DumpRecordedAudio(e.Result.Audio);
                }
            }
     
            
            
            void SreSpeechRecognized(object sender, SpeechRecognizedEventArgs e)
            {
                Debug.WriteLine("Rec word=" + e.Result.Text);
     
                
                m_context.Send(delegate
                {
               
                    switch (e.Result.Text)
                    {
                        case "start":
                            {
                                    textbox4.Text = "START";
                                    lift_box(-0.6f, -0.2f, 2.0f, 2.4f);
                            }
                            break;
                        case "next":
                            {
                                textbox4.Text = "STOP";
                            }
                            break;
                    } 
               }, null);
            }
             
        }
    }
    


     

    Saturday, December 17, 2011 3:34 AM

Answers

  • Ya i was a bit confused

    I was actually trying to call the lift_box method when cammand START is recognised and when skeleton is tracked... but its resolved now..

     

    • Marked as answer by rOHaN_sADaLE Thursday, December 22, 2011 3:08 AM
    Wednesday, December 21, 2011 3:49 PM
  • Can you mark you own post answered then so that other people dont get confused on the post and try to answer?
    Once you eliminate the impossible, whatever remains, no matter how improbable, must be the truth. - "Sherlock holmes" "speak softly and carry a big stick" - theodore roosevelt. Fear leads to anger, anger leads to hate, hate leads to suffering - Yoda
    • Marked as answer by rOHaN_sADaLE Thursday, December 22, 2011 3:08 AM
    Wednesday, December 21, 2011 9:12 PM

All replies

  • Confused what are you trying to do extactly?
    Once you eliminate the impossible, whatever remains, no matter how improbable, must be the truth. - "Sherlock holmes" "speak softly and carry a big stick" - theodore roosevelt. Fear leads to anger, anger leads to hate, hate leads to suffering - Yoda
    Wednesday, December 21, 2011 2:24 PM
  • Ya i was a bit confused

    I was actually trying to call the lift_box method when cammand START is recognised and when skeleton is tracked... but its resolved now..

     

    • Marked as answer by rOHaN_sADaLE Thursday, December 22, 2011 3:08 AM
    Wednesday, December 21, 2011 3:49 PM
  • Can you mark you own post answered then so that other people dont get confused on the post and try to answer?
    Once you eliminate the impossible, whatever remains, no matter how improbable, must be the truth. - "Sherlock holmes" "speak softly and carry a big stick" - theodore roosevelt. Fear leads to anger, anger leads to hate, hate leads to suffering - Yoda
    • Marked as answer by rOHaN_sADaLE Thursday, December 22, 2011 3:08 AM
    Wednesday, December 21, 2011 9:12 PM