Monday, January 25, 2010

Histogram-based median search

Реализация алгоритма поиска медианы в большом одномерном массиве чисел.
Это измененный вариант известного алгоритма Торбена.
Привожу этот алгоритм здесь, чтобы на него можно было сослаться.
Сам алгоритм приводится без пояснений, пояснения и характеристики производительности позже выложу здесь же в виде статьи.








package anjlab.cubics.aggregate.median;

import java.util.Arrays;

/**
 * Histogram-based implementation of median search algorithm.
 
 * Based on N. Devillard's implementation of Torben Mogensen's algorithm 
 * ({@linkplain http://ndevilla.free.fr/median/median.pdf}).
 
 @author Dmitry Gusev 
 */
public class HistogramBasedMedianFinder {

  private final int K;  //  number of ranges to create in histogram
  private final double epsilon;
  
  public HistogramBasedMedianFinder() {
    this(150.000000001d);
  }
  
  public HistogramBasedMedianFinder(int k, double epsilon) {
    this.K = k;
    this.epsilon = epsilon;
  }
  
  public double search(double[] m) {
    int n = m.length;

    int correction = n % == 0;

    double min, max;

    min = max = m[0];
    for (int i = 1; i < n; i++) {
      if (m[i< minmin = m[i];
      if (m[i> maxmax = m[i];
    }

    long leftOutRange = 0;
    long rightOutRange = 0;

    Histogram histogram = new Histogram();
    
    while (true) {
      histogram.init(min, max);
      
      for (int i = 0; i < n; i++) {
        histogram.add(m[i]);
      }

      long countInRanges = histogram.totalCount - histogram.others;

      if (countInRanges == 1) {
        return histogram.recentlyAdded;
      }

      long incCount = 0;
      
      for (int i = 0; i < histogram.rangeCount; i++)  {
        long rangeCount = histogram.counts[i];
        
        incCount += rangeCount;
        
        long rightOutCurrentRange = (countInRanges + rightOutRange- incCount;
      
        if (incCount + leftOutRange >= rightOutCurrentRange - correction) {
          //  Median is in this range. Repeat search in this range
          min = i == ? histogram.start : histogram.ranges[i - 1];
          max = histogram.ranges[i];
      
          if (almostSame(min, max)) {
            return min;
          }
          
          leftOutRange += (incCount - rangeCount)
          rightOutRange = rightOutCurrentRange;
          
          break;
        }
      }
    }
  }
  
  public boolean almostSame(double a, double b) {
    return Math.abs(a - b< epsilon;
  }
  
  private class Histogram {
    public double start;
    public double end;
    private double step;
    public double[] ranges = new double[K + 1];
    public long[] counts = new long[K + 1];
    public double recentlyAdded;
    public long totalCount;
    public long others;
    public int rangeCount;
    public void init(double start, double end) {
      this.start = start;
      this.end = end;
      Arrays.fill(counts, 0);
      others = 0;
      totalCount = 0;
      if (end < start || almostSame(start, end)) {
        rangeCount = 0;
      else {
        this.step = (end - start/ K;
        int idx = 0;
        for (double i = start; i < end; i += step) {
          double right = i + step;
          if (right > end) {
            right = end;
          }
          ranges[idx++= right;
        }
        rangeCount = idx;
      }

    }
    public void add(double value) {
      totalCount++;

      if (value < start || value > end) {
        others++;
        return;
      }

      recentlyAdded = value;

      //  binary search
      
      int low = 0;
      int high = rangeCount - 1;

      while (low <= high) {
        int mid = (high + low>>> 1;
        if (value <= ranges[mid]) {
          if (mid > && value < ranges[mid - 1]) {
            high = mid - 1;  //  continue search
          else {
            counts[mid]++;  //  we've found the range
            return;
          }
        else if (value > ranges[mid]) {
          low = mid + 1;
        }
      }
    }
  }  
}


Friday, January 15, 2010

Adobe Reader Team!

Сегодня вышло новое обновление Adobe Reader 8.2.0.
Как обычно, тихо и незаметно.
За все время сколько пользуюсь Reader'ом (с 2000 года, как только появился первый компьютер) ни разу не заглядывал в окошко Credits. Сегодня заглянул.

Оказывается в команде Adobe Reader'а почти 1000 человек! (971 если быть точным)

Может я никогда не уделял этому внимания, но по-моему это самая большая команда, про которую я слышал. Точнее я слышал про компании с несколькими десятками/сотнями разработчиков, но как-то в общем, без конкретики.

А тут в окошке Credits расписаны все команды и упомянут каждый член команды лично!



Решил ради интереса всех пересчитать, получилось целых 42 команды!

Между прочим, судя по (с) в окошке About, Reader'у уже 26-й год! (1984-2010)
Мы с ним почти ровесники :)

Желаю им и дальше расти и процветать!

p.s.




Team # of members
Team Engineering 141
Quality Engineering 113
Engineering Management 55
Release Engineering 5
Product Management 14
Marketing and Business Management 22
Acrobat Administration 5
Globalization 16
Prerelease Programs 5
User Interface 14
Additional Engineering 62
Additional Quality Engineering 40
Additional Engineering Management 57
Additional Product, Marketing, and Business Management 9
Core Tech Management 14
Core Tech Adobe Graphics Manager 7
Core Tech Adobe Graphics Manager Printing 7
Core Tech Adobe XML Engine 6
Core Tech Color Management 4
Core Type 8
Core Tech PDF Libraries 10
Core Tech Architecture 12
Core Tech Scripting 6
Core Tech AMT 50
Core Tech AMT Quality Engineering 18
Core Tech Metadata 4
Core Tech Sangam 4
Core Tech Quality Engineering 52
Core Tech Admins 8
Adobe Production Quality Engineering Team 20
Legal 7
Visual Design 6
Adobe Systems Japan 13
Adobe Systems China 4
Type 9
Technical and Customer Support 15
Developer Support 18
Client Services Support 26
Learning Resources 43
Information Technology 12
Packaging and Manufacturing 14
Adobe.com Web Team 16


Total 971

Thursday, December 24, 2009

Пару строчек из серии комментарии в коде

В работе с чужим кодом есть свои прелести :)


public static String RUSSIAN_NO_STRING = "НЕТ"; // russion word HET, do not try to correct this string!


/**
*<P>!!!! Pay attention to similiar (the same) method on PrintHandler.
* This method might be not used at all, there is quite mess about that.
*/

Saturday, November 28, 2009

Best Practice: Implementing Telerik RadGrid with Linq(ToSQL)

Telerik RadGrid is a very powerful component that allows you to view, edit, order and filter data.

I'd like to share my experience using this component with linq to sql.

Using this component is very straightforward unless your entity contains references to other entities (in other words if your table have foreign keys to other tables, see DB diagram below). Very often such relations have a kind of reference data.



In this case you may meet the following difficulties:

  1. ability to display data field of referenced entity (except its ID, of course) in grid column;
  2. ability to sort and filter data set of your entities by columns which are references to other entities.


To understand the cause of these difficulties lets look at simple example.

Generated Linq to SQL model for the database schema above will look like the following:



Now, to display a list of Books in RadGrid you should implement databinding. To do this you usually subscribe to RadGrid's OnNeedDataSource event and invoke RadGrid.DataBind() in PageLoad event handler:


protected void Page_Load(object sender, EventArgs e)
{
if (!IsPostBack)
{
RadGrid1.DataBind();
}
}

protected void RadGrid1_NeedDataSource(object source, GridNeedDataSourceEventArgs e)
{
if (!e.IsFromDetailTable)
{
if (e.RebindReason == GridRebindReason.InitialLoad
|| e.RebindReason == GridRebindReason.ExplicitRebind)
{
RadGrid1.VirtualItemCount = Book.GetBooksCount();
}

int skip = RadGrid1.MasterTableView.CurrentPageIndex * RadGrid1.MasterTableView.PageSize;
int take = RadGrid1.MasterTableView.PageSize;

RadGrid1.DataSource = Book.GetBooks(skip, take);
}
}


Here's our data access methods:


public static int GetBooksCount()
{
using (var ctx = new DataClasses1DataContext())
{
return GetBooksQuery(ctx).Count();
}
}

public static List<Book> GetBooks(int? skip, int? take)
{
using (var ctx = new DataClasses1DataContext())
{
var query = GetBooksQuery(ctx);

if (skip.HasValue)
{
query = query.Skip(skip.Value);
}
if (take.HasValue)
{
query = query.Take(take.Value);
}

return query.ToList();
}
}

private static IQueryable<Book> GetBooksQuery(DataClasses1DataContext ctx)
{
var query = (from book in ctx.Books
select book);

return query;
}


part of *.aspx file that holds RadGrid markup:


<form id="form1" runat="server">
<div>
<telerik:RadScriptManager ID="RadScriptManager1" runat="server"/>
<telerik:RadGrid ID="RadGrid1" runat="server"
OnNeedDataSource="RadGrid1_NeedDataSource"
AllowCustomPaging="True"
AllowPaging="True">
<PagerStyle AlwaysVisible="True" Mode="NextPrevAndNumeric" />
</telerik:RadGrid>
</div>
</form>


and the resulting grid:



Notice how we implemented pagind with just few lines of code.

By default RadGrid created columns and bound them to properties of our Book class (AutoGenerateColumns="True"). But we need to display Pubilsher's Name instead of PublisherID.

To do this we need to change AutoGenerateColumns to False and write RadGrid markup by hand.

Here is the markup:

<Columns>
<telerik:GridBoundColumn UniqueName="Title" HeaderText="Title"
DataField="Title" DataType="System.String" />
<telerik:GridBoundColumn UniqueName="Author" HeaderText="Author"
DataField="Author" DataType="System.String" />
<telerik:GridBoundColumn UniqueName="Publisher.Name" HeaderText="Publisher"
DataField="Publisher.Name" DataType="System.String" />
</Columns>

and code behind:

public partial class Book
{

public static int GetBooksCount()
{
using (var ctx = new DataClasses1DataContext())
{
return GetBooksQuery(ctx).Count();
}
}

public static List<Book> GetBooks(int? skip, int? take)
{
using (var ctx = new DataClasses1DataContext())
{

// Preload Book's Publisher field
var loadOptions = new DataLoadOptions();
loadOptions.LoadWith<Book>(b => b.Publisher);
ctx.LoadOptions = loadOptions;

var query = GetBooksQuery(ctx);

if (skip.HasValue)
{
query = query.Skip(skip.Value);
}
if (take.HasValue)
{
query = query.Take(take.Value);
}

return query.ToList();
}
}

private static IQueryable<Book> GetBooksQuery(DataClasses1DataContext ctx)
{
var query = (from book in ctx.Books
select book);

return query;
}
}

Here's what we got at this point:



Now we will add support for filter and sorting capabilities. To do this we should set RadGrid's AllowFilteringByColumn and AllowSorting to True and change our DAL methods to support query filtering and ordering.

RadGrid gives us very good support here, because it can generate linq string that contains part of linq query's where expression. To get this expression we wrote simple RadGridHelper class (see full source code in attachments below).

To use RadGrid expressions we need DynamicQueriable (CSharpSamples.zip\LinqSamples\DynamicQuery\DynamicQuery\Dynamic.cs) to mix dynamic linq expressions with static queries in DAL.

Finishing stroke is to make filtering case insensitive, in order to do this we should set CaseSensitive="False" in RadGrid's GroupingSettings.

Below are the resulting code linstings:

Resulting RadGrid markup:

<telerik:RadGrid ID="RadGrid1" runat="server"
OnNeedDataSource="RadGrid1_NeedDataSource"
AllowCustomPaging="True"
AllowPaging="True"
AllowFilteringByColumn="True"
AllowSorting="True"
AutoGenerateColumns="False" GridLines="None">
<HeaderContextMenu EnableTheming="True">
<CollapseAnimation Type="OutQuint" Duration="200"></CollapseAnimation>
</HeaderContextMenu>
<PagerStyle AlwaysVisible="True" Mode="NextPrevAndNumeric" />
<GroupingSettings CaseSensitive="False" />
<MasterTableView>
<ExpandCollapseColumn>
<HeaderStyle Width="20px"></HeaderStyle>
</ExpandCollapseColumn>
<Columns>
<telerik:GridBoundColumn UniqueName="Title" HeaderText="Title"
DataField="Title" DataType="System.String" SortExpression="Title" />
<telerik:GridBoundColumn UniqueName="Author" HeaderText="Author"
DataField="Author" DataType="System.String" SortExpression="Author" />
<telerik:GridBoundColumn UniqueName="PublisherName" HeaderText="Publisher"
DataField="Publisher.Name" DataType="System.String" SortExpression="Publisher.Name" />
</Columns>
</MasterTableView>
<FilterMenu EnableTheming="True">
<CollapseAnimation Type="OutQuint" Duration="200"></CollapseAnimation>
</FilterMenu>
</telerik:RadGrid>


Resulting code behind:


protected void Page_Load(object sender, EventArgs e)
{
if (!IsPostBack)
{
RadGrid1.DataBind();
}
}

protected void RadGrid1_NeedDataSource(object source, GridNeedDataSourceEventArgs e)
{
if (!e.IsFromDetailTable)
{
string where = RadGridHelper.GetFilterExpression(RadGrid1.MasterTableView,
null);
string orderBy = RadGridHelper.GetOrderBy(RadGrid1.MasterTableView);

if (e.RebindReason == GridRebindReason.InitialLoad
|| e.RebindReason == GridRebindReason.ExplicitRebind)
{
RadGrid1.VirtualItemCount = Book.GetBooksCount(where);
}

int skip = RadGrid1.MasterTableView.CurrentPageIndex * RadGrid1.MasterTableView.PageSize;
int take = RadGrid1.MasterTableView.PageSize;

RadGrid1.DataSource = Book.GetBooks(where, orderBy, skip, take);
}
}


Final Book class:


public partial class Book
{

public static int GetBooksCount(string where)
{
using (var ctx = new DataClasses1DataContext())
{
return GetBooksQuery(ctx, where, null).Count();
}
}

public static List<Book> GetBooks(string where, string orderBy, int? skip, int? take)
{
using (var ctx = new DataClasses1DataContext())
{
var query = GetBooksQuery(ctx, where, orderBy);

if (skip.HasValue)
{
query = query.Skip(skip.Value);
}
if (take.HasValue)
{
query = query.Take(take.Value);
}

return query.ToList();
}
}

private static IQueryable<BookWrapper> GetBooksQuery(DataClasses1DataContext ctx, string where, string orderBy)
{
var query = (from book in ctx.Books
select book);

if (!string.IsNullOrEmpty(where))
{
query = query.Where(where);
}
if (!string.IsNullOrEmpty(orderBy))
{
query = query.OrderBy(orderBy);
}

return query;
}
}


Resulting RadGrid in action:



Attachments


radgridsample-src.zip Full project sources + DB (142 KB)