A Google-like Full Text Search

  • Hi,

    i am using your code to search word in sql server table.

    I have one table candidates , i have lakhs of records in a Table.

    I am searching the word in the c_resume_text field.

    contains(c_resume_text,'C++') -It shwoing c++ and also other than c++ results.

    contains(c_resume_text,'C++11') -It showing results.

    contains(c_resume_text,'C++1') -It showing Zero results.

    in c_resume_text filed there is no word c++11 , but we have c++ word.

    In the above 3rd query if i remove the 1 from it not showing any result?why?

    2nd query showing results? why?

    I need to change any code....here

    Please help me....

    public static DataTable ExecuteQuery(string ftsQuery)

    {

    SqlDataAdapter da = null;

    DataTable dt = null;

    try

    {

    dt = new DataTable();

    da = new SqlDataAdapter

    (

    "SELECT ROW_NUMBER() OVER (ORDER BY c_id desc) AS Number, " +

    " c_id, " +

    " c_first_name " +

    " c_last_name " +

    "FROM candidates " +

    "WHERE CONTAINS(c_resume_text, @ftsQuery);",

    connectionString

    );

    da.SelectCommand.Parameters.Add("@ftsQuery", SqlDbType.NVarChar, 4000).Value = ftsQuery;

    da.Fill(dt);

    da.Dispose();

    }

    catch (Exception ex)

    {

    if (da != null)

    da.Dispose();

    if (dt != null)

    dt.Dispose();

    throw (ex);

    }

    return dt;

    }

    public SearchGrammar()

    {

    // Terminals

    var Term = new IdentifierTerminal("Term", "!@#$%^*_'.?", "!@#$%^*_'.?0123456789");

    // The following is not very imporant, but makes scanner recognize "or" and "and" as operators, not Terms

    // The "or" and "and" operator symbols found in grammar get higher priority in scanning and are checked

    // first, before the Term terminal, so Scanner produces operator token, not Term. For our purposes it does

    // not matter, we get around without it.

    Term.Priority = Terminal.LowestPriority;

    var Phrase = new StringLiteral("Phrase");

    // NonTerminals

    var OrExpression = new NonTerminal("OrExpression");

    var OrOperator = new NonTerminal("OrOperator");

    var AndExpression = new NonTerminal("AndExpression");

    var AndOperator = new NonTerminal("AndOperator");

    var ExcludeOperator = new NonTerminal("ExcludeOperator");

    var PrimaryExpression = new NonTerminal("PrimaryExpression");

    var ThesaurusExpression = new NonTerminal("ThesaurusExpression");

    var ThesaurusOperator = new NonTerminal("ThesaurusOperator");

    var ExactOperator = new NonTerminal("ExactOperator");

    var ExactExpression = new NonTerminal("ExactExpression");

    var ParenthesizedExpression = new NonTerminal("ParenthesizedExpression");

    var ProximityExpression = new NonTerminal("ProximityExpression");

    var ProximityList = new NonTerminal("ProximityList");

    this.Root = OrExpression;

    OrExpression.Rule = AndExpression

    | OrExpression + OrOperator + AndExpression;

    OrOperator.Rule = Symbol("or") | "|";

    AndExpression.Rule = PrimaryExpression

    | AndExpression + AndOperator + PrimaryExpression;

    AndOperator.Rule = Empty

    | "and"

    | "&"

    | ExcludeOperator;

    ExcludeOperator.Rule = Symbol("-");

    PrimaryExpression.Rule = Term

    | ThesaurusExpression

    | ExactExpression

    | ParenthesizedExpression

    | Phrase

    | ProximityExpression;

    ThesaurusExpression.Rule = ThesaurusOperator + Term;

    ThesaurusOperator.Rule = Symbol("~");

    ExactExpression.Rule = ExactOperator + Term

    | ExactOperator + Phrase;

    ExactOperator.Rule = Symbol("+");

    ParenthesizedExpression.Rule = "(" + OrExpression + ")";

    ProximityExpression.Rule = "<" + ProximityList + ">";

    MakePlusRule(ProximityList, Term);

    RegisterPunctuation("<", ">", "(", ")");

    }

    public static string ConvertQuery(AstNode node, TermType type)

    {

    string result = "";

    // Note that some NonTerminals don't actually get into the AST tree,

    // because of some Irony's optimizations - punctuation stripping and

    // node bubbling. For example, ParenthesizedExpression - parentheses

    // symbols get stripped off as punctuation, and child expression node

    // (parenthesized content) replaces the parent ParExpr node (the

    // child is "bubbled up").

    switch (node.Term.Name)

    {

    case "OrExpression":

    result = "(" + ConvertQuery(node.ChildNodes[0], type) + " OR " +

    ConvertQuery(node.ChildNodes[2], type) + ")";

    break;

    case "AndExpression":

    AstNode tmp2 = node.ChildNodes[1];

    string opName = tmp2.Term.Name;

    string andop = "";

    if (opName == "-")

    {

    andop += " AND NOT ";

    }

    else

    {

    andop = " AND ";

    type = TermType.Inflectional;

    }

    result = "(" + ConvertQuery(node.ChildNodes[0], type) + andop +

    ConvertQuery(node.ChildNodes[2], type) + ")";

    type = TermType.Inflectional;

    break;

    case "PrimaryExpression":

    result = "(" + ConvertQuery(node.ChildNodes[0], type) + ")";

    break;

    case "ProximityList":

    string[] tmp = new string[node.ChildNodes.Count];

    type = TermType.Exact;

    for (int i = 0; i < node.ChildNodes.Count; i++)

    {

    tmp = ConvertQuery(node.ChildNodes, type);

    }

    result = "(" + string.Join(" NEAR ", tmp) + ")";

    type = TermType.Inflectional;

    break;

    case "Phrase":

    result = '"' + ((Token)node).ValueString + '"';

    break;

    case "ThesaurusExpression":

    result = " FORMSOF (THESAURUS, " +

    ((Token)node.ChildNodes[1]).ValueString + ") ";

    break;

    case "ExactExpression":

    result = " \"" + ((Token)node.ChildNodes[1]).ValueString + "\" ";

    break;

    case "Term":

    switch (type)

    {

    case TermType.Inflectional:

    result = ((Token)node).ValueString;

    if (result.EndsWith("*"))

    result = "\"" + result + "\"";

    else

    result = " FORMSOF (INFLECTIONAL, " + result + ") ";

    break;

    case TermType.Exact:

    result = ((Token)node).ValueString;

    break;

    }

    break;

    // This should never happen, even if input string is garbage

    default:

    throw new ApplicationException("Converter failed: unexpected term: " +

    node.Term.Name + ". Please investigate.");

    }

    return result;

    }

    public enum TermType

    {

    Inflectional = 1,

    Thesaurus = 2,

    Exact = 3

    }

    Thanks,

  • Hi,

    i am using your code to search word in sql server table.

    I have one table candidates , i have lakhs of records in a Table.

    I am searching the word in the c_resume_text field.

    contains(c_resume_text,'C++') -It shwoing c++ and also other than c++ results.

    contains(c_resume_text,'C++11') -It showing results.

    contains(c_resume_text,'C++1') -It showing Zero results.

    in c_resume_text filed there is no word c++11 , but we have c++ word.

    In the above 3rd query if i remove the 1 from it not showing any result?why?

    2nd query showing results? why?

    I need to change any code....here

    Please help me....

    public static DataTable ExecuteQuery(string ftsQuery)

    {

    SqlDataAdapter da = null;

    DataTable dt = null;

    try

    {

    dt = new DataTable();

    da = new SqlDataAdapter

    (

    "SELECT ROW_NUMBER() OVER (ORDER BY c_id desc) AS Number, " +

    " c_id, " +

    " c_first_name " +

    " c_last_name " +

    "FROM candidates " +

    "WHERE CONTAINS(c_resume_text, @ftsQuery);",

    connectionString

    );

    da.SelectCommand.Parameters.Add("@ftsQuery", SqlDbType.NVarChar, 4000).Value = ftsQuery;

    da.Fill(dt);

    da.Dispose();

    }

    catch (Exception ex)

    {

    if (da != null)

    da.Dispose();

    if (dt != null)

    dt.Dispose();

    throw (ex);

    }

    return dt;

    }

    public SearchGrammar()

    {

    // Terminals

    var Term = new IdentifierTerminal("Term", "!@#$%^*_'.?", "!@#$%^*_'.?0123456789");

    // The following is not very imporant, but makes scanner recognize "or" and "and" as operators, not Terms

    // The "or" and "and" operator symbols found in grammar get higher priority in scanning and are checked

    // first, before the Term terminal, so Scanner produces operator token, not Term. For our purposes it does

    // not matter, we get around without it.

    Term.Priority = Terminal.LowestPriority;

    var Phrase = new StringLiteral("Phrase");

    // NonTerminals

    var OrExpression = new NonTerminal("OrExpression");

    var OrOperator = new NonTerminal("OrOperator");

    var AndExpression = new NonTerminal("AndExpression");

    var AndOperator = new NonTerminal("AndOperator");

    var ExcludeOperator = new NonTerminal("ExcludeOperator");

    var PrimaryExpression = new NonTerminal("PrimaryExpression");

    var ThesaurusExpression = new NonTerminal("ThesaurusExpression");

    var ThesaurusOperator = new NonTerminal("ThesaurusOperator");

    var ExactOperator = new NonTerminal("ExactOperator");

    var ExactExpression = new NonTerminal("ExactExpression");

    var ParenthesizedExpression = new NonTerminal("ParenthesizedExpression");

    var ProximityExpression = new NonTerminal("ProximityExpression");

    var ProximityList = new NonTerminal("ProximityList");

    this.Root = OrExpression;

    OrExpression.Rule = AndExpression

    | OrExpression + OrOperator + AndExpression;

    OrOperator.Rule = Symbol("or") | "|";

    AndExpression.Rule = PrimaryExpression

    | AndExpression + AndOperator + PrimaryExpression;

    AndOperator.Rule = Empty

    | "and"

    | "&"

    | ExcludeOperator;

    ExcludeOperator.Rule = Symbol("-");

    PrimaryExpression.Rule = Term

    | ThesaurusExpression

    | ExactExpression

    | ParenthesizedExpression

    | Phrase

    | ProximityExpression;

    ThesaurusExpression.Rule = ThesaurusOperator + Term;

    ThesaurusOperator.Rule = Symbol("~");

    ExactExpression.Rule = ExactOperator + Term

    | ExactOperator + Phrase;

    ExactOperator.Rule = Symbol("+");

    ParenthesizedExpression.Rule = "(" + OrExpression + ")";

    ProximityExpression.Rule = "<" + ProximityList + ">";

    MakePlusRule(ProximityList, Term);

    RegisterPunctuation("<", ">", "(", ")");

    }

    public static string ConvertQuery(AstNode node, TermType type)

    {

    string result = "";

    // Note that some NonTerminals don't actually get into the AST tree,

    // because of some Irony's optimizations - punctuation stripping and

    // node bubbling. For example, ParenthesizedExpression - parentheses

    // symbols get stripped off as punctuation, and child expression node

    // (parenthesized content) replaces the parent ParExpr node (the

    // child is "bubbled up").

    switch (node.Term.Name)

    {

    case "OrExpression":

    result = "(" + ConvertQuery(node.ChildNodes[0], type) + " OR " +

    ConvertQuery(node.ChildNodes[2], type) + ")";

    break;

    case "AndExpression":

    AstNode tmp2 = node.ChildNodes[1];

    string opName = tmp2.Term.Name;

    string andop = "";

    if (opName == "-")

    {

    andop += " AND NOT ";

    }

    else

    {

    andop = " AND ";

    type = TermType.Inflectional;

    }

    result = "(" + ConvertQuery(node.ChildNodes[0], type) + andop +

    ConvertQuery(node.ChildNodes[2], type) + ")";

    type = TermType.Inflectional;

    break;

    case "PrimaryExpression":

    result = "(" + ConvertQuery(node.ChildNodes[0], type) + ")";

    break;

    case "ProximityList":

    string[] tmp = new string[node.ChildNodes.Count];

    type = TermType.Exact;

    for (int i = 0; i < node.ChildNodes.Count; i++)

    {

    tmp = ConvertQuery(node.ChildNodes, type);

    }

    result = "(" + string.Join(" NEAR ", tmp) + ")";

    type = TermType.Inflectional;

    break;

    case "Phrase":

    result = '"' + ((Token)node).ValueString + '"';

    break;

    case "ThesaurusExpression":

    result = " FORMSOF (THESAURUS, " +

    ((Token)node.ChildNodes[1]).ValueString + ") ";

    break;

    case "ExactExpression":

    result = " \"" + ((Token)node.ChildNodes[1]).ValueString + "\" ";

    break;

    case "Term":

    switch (type)

    {

    case TermType.Inflectional:

    result = ((Token)node).ValueString;

    if (result.EndsWith("*"))

    result = "\"" + result + "\"";

    else

    result = " FORMSOF (INFLECTIONAL, " + result + ") ";

    break;

    case TermType.Exact:

    result = ((Token)node).ValueString;

    break;

    }

    break;

    // This should never happen, even if input string is garbage

    default:

    throw new ApplicationException("Converter failed: unexpected term: " +

    node.Term.Name + ". Please investigate.");

    }

    return result;

    }

    public enum TermType

    {

    Inflectional = 1,

    Thesaurus = 2,

    Exact = 3

    }

    Thanks,

Viewing 2 posts - 166 through 166 (of 166 total)

You must be logged in to reply to this topic. Login to reply