以文本方式查看主题

-  中文XML论坛 - 专业的XML技术讨论区  (http://bbs.xml.org.cn/index.asp)
--  『 Dot NET,C#,ASP,VB 』  (http://bbs.xml.org.cn/list.asp?boardid=43)
----  在给定数组中找第K大的数,线性时间,C#实现,200万数据找中位数6.6分钟[原创]  (http://bbs.xml.org.cn/dispbbs.asp?boardid=43&rootid=&id=35391)


--  作者:pennyliang
--  发布时间:7/8/2006 11:21:00 PM

--  在给定数组中找第K大的数,线性时间,C#实现,200万数据找中位数6.6分钟[原创]
using System;

namespace Algorithm.Test
{
 public class ComparableObject:IComparable
 {
  int m_value;
  public int Value
  {
   get{return m_value;}
  }
  public ComparableObject(int value)
  {
   m_value = value;
  }
  public static implicit operator int(ComparableObject m)
  {
   return m.Value;
  }

  #region IComparable 成员

  public int CompareTo(object obj)
  {
   
   ComparableObject testObj = obj as ComparableObject;
   if(null!=testObj)
   {
    if(testObj.Value>this.Value)
    {
     return -1;
    }
    else if(testObj.Value == this.Value)
    {
     return 0;
    }
    else
    {
     return 1;
    }
   }
   else
   {
    throw new Exception(string.Format("object type is not testCase"));
   }
  }
  

  #endregion
 }
}

using System;
using System.Collections;
using NUnit.Framework;
using Algorithm.Test;

namespace Algorithm.Select
{
 /// <summary>
 /// Kth_Samllest_Selection 的摘要说明。
 /// </summary>
 public class Kth_Samllest_Selection
 {
  ArrayList m_al;
  ArrayList m_MedianAl;
  ArrayList m_MedianAlAl;
  ArrayList m_S1;
  ArrayList m_S2;

  int m_K;
  public Kth_Samllest_Selection(ArrayList al,int K)
  {
   m_al = al;
   m_K = K;
   m_MedianAl = new ArrayList(al.Count/5+1);
   m_MedianAlAl = new ArrayList(al.Count/5+1);
   m_S1 = new ArrayList(al.Count/2);
   m_S2 = new ArrayList(al.Count/2);
  }
  /// <summary>
  /// 执行操作
  /// </summary>
  /// <returns>第K个数的大小</returns>
  public int Do()
  {
   
   if(m_al.Count<=5)
   {
    m_al.Sort();
    return ((ComparableObject)m_al[m_K-1]).Value;
   }
   while(m_al.Count%5!=0)
   {
    m_al.Add(new ComparableObject(int.MaxValue));
   }
   int k = m_al.Count/5;
   for(int i=0;i<k;i++)
   {
    int start = i*5;
    
    m_MedianAlAl.Add(MakeMedianNumMiddle((IComparable)m_al[start],(IComparable)m_al[start+1],(IComparable)m_al[start+2],(IComparable)m_al[start+3],(IComparable)m_al[start+4]));
    m_MedianAl.Add((MakeMedianNumMiddle((IComparable)m_al[start],(IComparable)m_al[start+1],(IComparable)m_al[start+2],(IComparable)m_al[start+3],(IComparable)m_al[start+4]))[2]);

   }
   Kth_Samllest_Selection selector = new Kth_Samllest_Selection(m_MedianAl,m_MedianAl.Count/2);
   int tmp = selector.Do();
   m_MedianAl.Clear();
   m_MedianAl = null;
   for(int i=0;i<m_MedianAlAl.Count;i++)
   {
    ArrayList al = m_MedianAlAl[i] as ArrayList;
    if(((ComparableObject)al[2]).Value <tmp)
    {
     m_S1.Add(al[0]);
     m_S1.Add(al[1]);
     m_S1.Add(al[2]);
     if(((ComparableObject)al[3]).Value<tmp)
     {
      m_S1.Add(al[3]);
     }
     else
     {
      m_S2.Add(al[3]);
     }
     if(((ComparableObject)al[4]).Value<tmp)
     {
      m_S1.Add(al[4]);
     }
     else
     {
      m_S2.Add(al[4]);
     }
     al.Clear();
     al = null;
    }
    else
    {
     m_S2.Add(al[2]);
     m_S2.Add(al[3]);
     m_S2.Add(al[4]);
     if(((ComparableObject)al[0]).Value>tmp)
     {
      m_S2.Add(al[0]);
     }
     else
     {
      m_S1.Add(al[0]);
     }
     if(((ComparableObject)al[1]).Value>tmp)
     {
      m_S2.Add(al[1]);
     }
     else
     {
      m_S1.Add(al[1]);
     }
     al.Clear();
     al = null;
    }
   }
   m_MedianAlAl.Clear();
   m_MedianAlAl = null;
   m_al.Clear();
   m_al = null;
   ///System.Threading.Thread.Sleep(5);
   if(m_S1.Count+1==m_K)
   {
    return tmp;
   }
   else if(m_K<=m_S1.Count)
   {
    
    Kth_Samllest_Selection selector1 = new Kth_Samllest_Selection(m_S1,m_K);
    m_S2.Clear();
    m_S2 = null;
    return selector1.Do();
   }
   else
   {
    
    
    Kth_Samllest_Selection selector2 = new Kth_Samllest_Selection(m_S2,m_K-m_S1.Count);
    m_S1.Clear();
    m_S1 = null;
    return selector2.Do();
   }
  }
  //最多比较7次可以在5个数中找到中位数
  public ArrayList MakeMedianNumMiddle(IComparable num1,IComparable num2,IComparable num3,IComparable num4,IComparable num5)
  {
   ArrayList retAl = new ArrayList(5);
   //-------------------------------------------------------------------
   if(num2.CompareTo(num1)<0)//num1 存放小数
   {
    Exchange(ref num1,ref num2);
   }
   if(num4.CompareTo(num3)<0)//num3 存放小数
   {
    Exchange(ref num3,ref num4);
   }
   //       1       3       5
   //      /       /
   //     2       4
   //---------------------------------------------------------------------
   if(num3.CompareTo(num1)<0)//num1 存放小数
   {
    Exchange(ref num1,ref num3);
    Exchange(ref num2,ref num4);
   }
   //--------------------before here compare three times
   //--------------------after here compare at most four times
   //          1      5
   //        /   \
   //      2       3
   //            /
   //           4
   //---------------------------------------------------------------------
   if(num5.CompareTo(num3)<0)
   {
    //       1      5
    //      /  \   /
    //     2     3
    //            \
    //              4
    //--------------------------------------------------------------------
    #region compare tree times 12354
    if(num3.CompareTo(num2)<0)
    {
     #region compare two times 15234
     //        1       5
     //       /  \   /
     //     2      3
     //             \
     //               4
     //--------------------------------------------------------------------
     if(num3.CompareTo(num2)<0)
     {
      //       1      5
      //        \    /
      //          3
      //        /   \
      //      2       4
      //--------------------------------------------------------------------
      retAl.Add(num1);
      retAl.Add(num5);
      retAl.Add(num3);
      retAl.Add(num2);
      retAl.Add(num4);
     }
     else
     {
      //      1      
      //       \   
      //        2   5
      //         \ /
      //          3
      //           \
      //            4
      //--------------------------------------------------------------------
      #region compare once 12534
      if(num2.CompareTo(num5)<0)
      {
       //      1      
       //       \   
       //        2  
       //         \
       //          5
       //           \
       //            3
       //             \
       //              4
       //--------------------------------------------------------------------
       retAl.Add(num1);
       retAl.Add(num2);
       retAl.Add(num5);
       retAl.Add(num3);
       retAl.Add(num4);
      }
      else
      {
       //      1      
       //       \   
       //        5  
       //         \
       //          2
       //           \
       //            3
       //             \
       //              4
       //--------------------------------------------------------------------
       retAl.Add(num1);
       retAl.Add(num5);
       retAl.Add(num2);
       retAl.Add(num3);
       retAl.Add(num4);
      }
      #endregion
     }
     #endregion
    }
    else
    {
     //     1
     //      \
     //       2    5
     //        \  /
     //         3
     //          \
     //           4
     //--------------------------------------------------------------------
     #region compare once 15234
     if(num5.CompareTo(num2)<0)
     {
      //     1
      //      \
      //       5 
      //        \
      //         2
      //          \    
      //           3
      //            \
      //             4
      //--------------------------------------------------------------------
      retAl.Add(num1);
      retAl.Add(num5);
      retAl.Add(num2);
      retAl.Add(num3);
      retAl.Add(num4);
     }
     else
     {
      //     1
      //      \
      //       2 
      //        \
      //         5
      //          \    
      //           3
      //            \
      //             4
      //--------------------------------------------------------------------
      retAl.Add(num1);
      retAl.Add(num2);
      retAl.Add(num5);
      retAl.Add(num3);
      retAl.Add(num4);
     }
     #endregion
    }
    #endregion
   }
   else
   {
    //        1
    //       /  \
    //      2    3
    //          /  \
    //        4      5
    //--------------------------------------------------------------------
    #region compare three times 12345
    if(num2.CompareTo(num3)<0)
    {
     //      1
     //       \
     //        2
     //         \
     //          3
     //        /   \
     //       4     5
     //--------------------------------------------------------------------
     retAl.Add(num1);
     retAl.Add(num2);
     retAl.Add(num3);
     retAl.Add(num4);
     retAl.Add(num5);
    }
    else
    {
     #region compare two times 13245
     //          1
     //          |
     //          3
     //        / | \
     //       2  4  5
     if(num4.CompareTo(num2)<0)
     {
      Exchange(ref num2,ref num4);
     }
     //         1
     //         |
     //         3
     //       /   \
     //      2     5
     //     /
     //    4
     if(num2.CompareTo(num5)<0)
     {
      //         1
      //         |
      //         3
      //        /   
      //       2    
      //      /  \
      //     4     5
      retAl.Add(num1);
      retAl.Add(num3);
      retAl.Add(num2);
      retAl.Add(num4);
      retAl.Add(num5);
     }
     else
     {
      //  1
      //  |
      //  3
      //  |
      //  5
      //  |
      //  2
      //  |
      //  4
      retAl.Add(num1);
      retAl.Add(num3);
      retAl.Add(num5);
      retAl.Add(num2);
      retAl.Add(num4);
     }
     #endregion
    }
    #endregion
   }
   return retAl;
  }
  private void Exchange(ref IComparable obj1,ref IComparable obj2)
  {
   if(obj1.GetType().Equals(obj2.GetType()))
   {
    IComparable tmp = obj2;
    obj2 = obj1;
    obj1 =tmp;
   }
   else
   {
    throw new Exception(string.Format("obj1 type {0} does not equal obj2 type {2}",obj1.GetType(),obj2.GetType()));
   }
  }
 
 }
 #region Unit Test
 [TestFixture]
 [Category("Select")]
 public class TestKth_Samllest_Selection
 {
  ArrayList m_array1;

  public TestKth_Samllest_Selection()
  {
   m_array1 = null;
   
  }
  [TestFixtureSetUp]
  public void Init()
  {
   m_array1 = new ArrayList(2000000);
   for(int i=0;i<2000000;++i)
   {
    m_array1.Add(new ComparableObject(new System.Random().Next(2000000)));
   }
   //for(int i=0;i<10000;++i)
   //{
   // m_array1.Add(new ComparableObject(new System.Random().Next(100,1000)));
   //}
  }
  [Test]
  public void test()
  {
   long tmp = DateTime.Now.Ticks;
   Kth_Samllest_Selection selector = new Kth_Samllest_Selection(m_array1,1000000);
   Console.WriteLine(selector.Do());
   Console.WriteLine(DateTime.Now.Ticks-tmp);
   //ArrayList ret = selector.MakeMedianNumMiddle(new ComparableObject(5),
   //           new ComparableObject(4),
   //           new ComparableObject(2),
   //           new ComparableObject(3),
   //           new ComparableObject(1));
   //foreach(ComparableObject item in ret)
   //{
   // Console.WriteLine(item);
   //}
   Console.ReadLine();
          
  }
  public static void Main()
  {
   TestKth_Samllest_Selection tester = new TestKth_Samllest_Selection();
   tester.Init();
   tester.test();
  }
 }
 #endregion
}


--  作者:pennyliang
--  发布时间:7/8/2006 11:26:00 PM

--  
参见 计算机算法-设计与分析导论 第三版 Sara Basse,Alen Van Gelder
--  作者:pennyliang
--  发布时间:7/9/2006 8:02:00 PM

--  
改进了一下,耗时减少一半,还可以继续优化,如果把
MedianAl的开销省掉,使用m_al作in-place的运算,另外使用.net 2.0可以节约大量装箱,拆箱的开销,估计时间降到1分左右。

using System;
using System.Collections;
using NUnit.Framework;
using Algorithm.Test;

namespace Algorithm.Select
{
 /// <summary>
 /// Kth_Samllest_Selection 的摘要说明。
 /// </summary>
 public class Kth_Samllest_Selection
 {
  ArrayList m_al;
  ArrayList m_MedianAl;
  ArrayList m_S1;
  ArrayList m_S2;

  int m_K;
  public Kth_Samllest_Selection(ArrayList al,int K)
  {
   m_al = al;
   m_K = K;
   m_MedianAl = new ArrayList(al.Count/5+1);
   m_S1 = new ArrayList(al.Count/2);
   m_S2 = new ArrayList(al.Count/2);
  }
  /// <summary>
  /// 执行操作
  /// </summary>
  /// <returns>第K个数的大小</returns>
  public int Do()
  {
   
   if(m_al.Count<=5)
   {
    m_al.Sort();
    return ((ComparableObject)m_al[m_K-1]).Value;
   }
   while(m_al.Count%5!=0)
   {
    m_al.Add(new ComparableObject(int.MaxValue));
   }
   int k = m_al.Count/5;
   for(int i=0;i<k;i++)
   {
    int start = i*5;
    int num1 = ((ComparableObject)m_al[start]).Value;
    int num2 = ((ComparableObject)m_al[start+1]).Value;
    int num3 = ((ComparableObject)m_al[start+2]).Value;
    int num4 = ((ComparableObject)m_al[start+3]).Value;
    int num5 = ((ComparableObject)m_al[start+4]).Value;
    
    MakeMedianNumMiddle(ref num1,ref num2,ref num3,ref num4,ref num5);

    ((ComparableObject)m_al[start]).Value = num1;
    ((ComparableObject)m_al[start+1]).Value= num2;
    ((ComparableObject)m_al[start+2]).Value= num3;
    ((ComparableObject)m_al[start+3]).Value= num4;
    ((ComparableObject)m_al[start+4]).Value= num5;
    m_MedianAl.Add((ComparableObject)num3);

    //m_MedianAl.Add((MakeMedianNumMiddle(ref (ComparableObject)m_al[start],ref (ComparableObject)m_al[start+1],ref (ComparableObject)m_al[start+2],ref (ComparableObject)m_al[start+3],ref (ComparableObject)m_al[start+4]))[2]);

   }
   Kth_Samllest_Selection selector = new Kth_Samllest_Selection(m_MedianAl,m_MedianAl.Count/2);
   int tmp = selector.Do();
   m_MedianAl.Clear();
   m_MedianAl = null;
   for(int i=0;i<m_al.Count/5;i++)
   {
    int start = i*5;
    if(((ComparableObject)m_al[start+2]).Value <tmp)
    {
     m_S1.Add((ComparableObject)m_al[start+0]);
     m_S1.Add(m_al[start+1]);
     m_S1.Add(m_al[start+2]);
     if(((ComparableObject)m_al[start+3]).Value<tmp)
     {
      m_S1.Add(m_al[start+3]);
     }
     else
     {
      m_S2.Add(m_al[start+3]);
     }
     if(((ComparableObject)m_al[start+4]).Value<tmp)
     {
      m_S1.Add(m_al[start+4]);
     }
     else
     {
      m_S2.Add(m_al[start+4]);
     }
     
    }
    else
    {
     m_S2.Add(m_al[start+2]);
     m_S2.Add(m_al[start+3]);
     m_S2.Add(m_al[start+4]);
     if(((ComparableObject)m_al[start]).Value>tmp)
     {
      m_S2.Add(m_al[start]);
     }
     else
     {
      m_S1.Add(m_al[start+0]);
     }
     if(((ComparableObject)m_al[start+1]).Value>tmp)
     {
      m_S2.Add(m_al[start+1]);
     }
     else
     {
      m_S1.Add(m_al[start+1]);
     }
     
    }
   }
   
   ///System.Threading.Thread.Sleep(5);
   if(m_S1.Count+1==m_K)
   {
    return tmp;
   }
   else if(m_K<=m_S1.Count)
   {
    
    Kth_Samllest_Selection selector1 = new Kth_Samllest_Selection(m_S1,m_K);
    m_S2.Clear();
    m_S2 = null;
    int ret = selector1.Do();
    selector1 = null;
    return ret;
   }
   else
   {
    
    
    Kth_Samllest_Selection selector2 = new Kth_Samllest_Selection(m_S2,m_K-m_S1.Count);
    m_S1.Clear();
    m_S1 = null;
    int ret = selector2.Do();
    selector2 = null;
    return ret;
   }
  }
  //最多比较6次可以在5个数中找到中位数
  public void MakeMedianNumMiddle(ref int num1,ref int num2,ref int num3,ref int num4,ref int num5)
  {
   //-------------------------------------------------------------------
   if(num2.CompareTo(num1)<0)//num1 存放小数
   {
    Exchange(ref num1,ref num2);
   }
   if(num4.CompareTo(num3)<0)//num3 存放小数
   {
    Exchange(ref num3,ref num4);
   }
   //       1       3       5
   //      /       /
   //     2       4
   //---------------------------------------------------------------------
   if(num3.CompareTo(num1)<0)//num1 存放小数
   {
    Exchange(ref num1,ref num3);
    Exchange(ref num2,ref num4);
   }
   //--------------------before here compare three times
   //--------------------after here compare at most four times
   //          1      5
   //        /   \
   //      2       3
   //            /
   //           4
   //---------------------------------------------------------------------
   if(num2.CompareTo(num5)<0)
   {
    #region compare two times 12354
    //       1     
    //      /  \   
    //     2     3
    //    /       \
    //   5          4
    //--------------------------------------------------------------------
    
    if(num2.CompareTo(num3)<0)
    {
     #region compare once 15234
     //       1     
     //      /     
     //     2   
     //    /  \     
     //   5    3
     //         \
     //          4
     //--------------------------------------------------------------------
     if(num5.CompareTo(num3)<0)
     {
      //      1
      //      |
      //      2
      //      |
      //      5
      //      |
      //      3
      //      |
      //      4
      Exchange(ref num3,ref num5);
     }
     else
     {
      //      1      
      //       \   
      //        2   
      //         \
      //          3
      //         / \
      //        5   4
      //--------------------------------------------------------------------
     }
     #endregion
    }
    else
    {
     #region compare once 15234
     //       1     
     //      /     
     //     3   
     //    /  \     
     //   4    2
     //         \
     //          5
     //--------------------------------------------------------------------
     
     if(num2.CompareTo(num4)<0)
     {
      //     1
      //      \
      //       3 
      //        \
      //         2
      //        /  \    
      //       4    5
      //--------------------------------------------------------------------
      Exchange(ref num3,ref num2);
     }
     else
     {
      //     1
      //      \
      //       3 
      //        \
      //         4
      //          \    
      //           2
      //            \
      //             5
      //--------------------------------------------------------------------
      Exchange(ref num3,ref num4);
      Exchange(ref num4,ref num2);
     }
     #endregion
    }
    #endregion
   }
   else
   {
    #region compare two times 15432
    //    5     1
    //     \  /  \
    //       2    3
    //            /  
    //          4      
    //--------------------------------------------------------------------
    
    if(num5.CompareTo(num3)<0)
    {
     #region compare once 12345
     //    5  1
     //    |\/|
     //    2  3
     //       |  
     //       4      
     //--------------------------------------------------------------------
     if(num2.CompareTo(num3)<0)
     {
      //    5  1
      //    | /
      //    2
      //     \
      //  3
      //       |  
      //       4      
      //--------------------------------------------------------------------
      Exchange(ref num3,ref num2);
      Exchange(ref num2,ref num5);
     }
     else
     {
      //     5 1
      //      \|
      //       3
      //      /|  
      //     2 4      
      //--------------------------------------------------------------------
      Exchange(ref num2,ref num5);

     }
     #endregion
    }
    else
    {
     #region compare once 13245
     //      1      
     //      |
     //  3   
     //     /  \   
     //    4    5
     //          \
     //           2  
     if(num4.CompareTo(num5)<0)
     {
      //      1      
      //      |
      //  3   
      //      |   
      //      4
      //      |
      //      5
      //      |
      //      2  
      Exchange(ref num3,ref num4);
      Exchange(ref num2,ref num4);
     }
     else
     {
      //      1      
      //      |
      //  3   
      //      |   
      //      5
      //     / \
      //    4   2
      //      
      //
      Exchange(ref num3,ref num5);
      Exchange(ref num5,ref num2);
     }
     #endregion
    }
    #endregion
    
   }
  }
  private void Exchange(ref int obj1,ref int obj2)
  {
    int tmp = obj2;
    obj2 = obj1;
    obj1 =tmp;
  }
 
 }
 #region Unit Test
 [TestFixture]
 [Category("Select")]
 public class TestKth_Samllest_Selection
 {
  ArrayList m_array1;

  public TestKth_Samllest_Selection()
  {
   m_array1 = null;
   
  }
  [TestFixtureSetUp]
  public void Init()
  {
   m_array1 = new ArrayList(2000001);
   for(int i=0;i<2000000;++i)
   {
    m_array1.Add(new ComparableObject(new System.Random().Next(2000000)));
   }

//   for(int i=0;i<25;++i)
//   {
//    m_array1.Add(new ComparableObject(i+1));
//   }
//   m_array1.Add(new ComparableObject(1));
//   m_array1.Add(new ComparableObject(20));
//   m_array1.Add(new ComparableObject(3));
//   m_array1.Add(new ComparableObject(14));
//   m_array1.Add(new ComparableObject(25));
//   m_array1.Add(new ComparableObject(6));
//   m_array1.Add(new ComparableObject(10));
//   m_array1.Add(new ComparableObject(8));
//   m_array1.Add(new ComparableObject(9));
//   m_array1.Add(new ComparableObject(7));
//   m_array1.Add(new ComparableObject(11));
//   m_array1.Add(new ComparableObject(12));
//   m_array1.Add(new ComparableObject(24));
//   m_array1.Add(new ComparableObject(4));
//   m_array1.Add(new ComparableObject(15));
//   m_array1.Add(new ComparableObject(16));
//   m_array1.Add(new ComparableObject(17));
//   m_array1.Add(new ComparableObject(18));
//   m_array1.Add(new ComparableObject(19));
//   m_array1.Add(new ComparableObject(2));
//   m_array1.Add(new ComparableObject(21));
//   m_array1.Add(new ComparableObject(22));
//   m_array1.Add(new ComparableObject(23));
//   m_array1.Add(new ComparableObject(13));
//   m_array1.Add(new ComparableObject(5));
//   for(int i=0;i<10000;++i)
//   {
//    m_array1.Add(new ComparableObject(new System.Random().Next(100,1000)));
//   }
  }
  [Test]
  public void test()
  {
   long tmp = DateTime.Now.Ticks;
   Kth_Samllest_Selection selector = new Kth_Samllest_Selection(m_array1,1000000);
   Console.WriteLine(selector.Do());
   Console.WriteLine((DateTime.Now.Ticks-tmp)/6000000.0);
   //ArrayList ret = selector.MakeMedianNumMiddle(new ComparableObject(5),
   //           new ComparableObject(4),
   //           new ComparableObject(2),
   //           new ComparableObject(3),
   //           new ComparableObject(1));
   //foreach(ComparableObject item in ret)
   //{
   // Console.WriteLine(item);
   //}
   Console.ReadLine();
          
  }
  public static void Main()
  {
   TestKth_Samllest_Selection tester = new TestKth_Samllest_Selection();
   tester.Init();
   tester.test();
  }
 }
 #endregion
}

[此贴子已经被作者于2006-7-11 19:23:44编辑过]

--  作者:bellebirdie
--  发布时间:8/24/2007 10:14:00 AM

--  
真牛~
W 3 C h i n a ( since 2003 ) 旗 下 站 点
苏ICP备05006046号《全国人大常委会关于维护互联网安全的决定》《计算机信息网络国际联网安全保护管理办法》
171.875ms