Skip to content

Instantly share code, notes, and snippets.

@Jun1sMe
Forked from relyky/AddressParser.cs
Created January 22, 2022 07:57
Show Gist options
  • Select an option

  • Save Jun1sMe/e98ab2f488965a8c90b984529b461274 to your computer and use it in GitHub Desktop.

Select an option

Save Jun1sMe/e98ab2f488965a8c90b984529b461274 to your computer and use it in GitHub Desktop.

Revisions

  1. @relyky relyky revised this gist May 6, 2020. No changes.
  2. @relyky relyky revised this gist Feb 26, 2018. 1 changed file with 1 addition and 2 deletions.
    3 changes: 1 addition & 2 deletions RegEx_pattern.txt
    Original file line number Diff line number Diff line change
    @@ -3,5 +3,4 @@
    (?=.*[0-9])(?=.*[A-Z])(?=.*[a-z])(?=.*[!@#$%^&+=])\S{8,}

    // 密碼驗證:必需有數字、大寫英文字元、小寫英文字元、特殊字元,只能填指定字元(英數字與特殊字元)8位以上
    (?=.*[0-9])(?=.*[A-Z])(?=.*[a-z])(?=.*[!@#$%^&+=])[a-zA-Z0-9!@#$%^&+=]{8,}

    (?=.*\d)(?=.*[A-Z])(?=.*[a-z])(?=.*[!@#$%^&+=])[\w!@#$%^&+=]{8,}
  3. @relyky relyky revised this gist Feb 26, 2018. 1 changed file with 7 additions and 0 deletions.
    7 changes: 7 additions & 0 deletions RegEx_pattern.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,7 @@

    // 密碼驗證:必需有數字、大寫英文字元、小寫英文字元、特殊字元,非空白字元8位以上
    (?=.*[0-9])(?=.*[A-Z])(?=.*[a-z])(?=.*[!@#$%^&+=])\S{8,}

    // 密碼驗證:必需有數字、大寫英文字元、小寫英文字元、特殊字元,只能填指定字元(英數字與特殊字元)8位以上
    (?=.*[0-9])(?=.*[A-Z])(?=.*[a-z])(?=.*[!@#$%^&+=])[a-zA-Z0-9!@#$%^&+=]{8,}

  4. @relyky relyky revised this gist Oct 19, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion RegEx_parse_a_sentence.cs
    Original file line number Diff line number Diff line change
    @@ -15,7 +15,7 @@ protected void btn02_Click(object sender, EventArgs e)
    Match m1 = regptn1.Match(str, 0); // 開始比對
    if (!m1.Success)
    {
    txt02ans.Text = "X"; // not match
    txt02ans.Text = "X"; // not match & show
    return;
    }

  5. @relyky relyky revised this gist Oct 19, 2016. 1 changed file with 50 additions and 0 deletions.
    50 changes: 50 additions & 0 deletions RegEx_parse_a_sentence.cs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,50 @@
    ////////////////////////////////////////////////////////////////////
    // 連續使用 RegEx.Match 解析一句完整的句字。當然文法是固定的簡單句子。
    ////////////////////////////////////////////////////////////////////

    protected void btn02_Click(object sender, EventArgs e)
    {
    //## parse sentence
    string str = txt02src.Text.Trim(); // "This is a BOOK." <-- 解析此文字

    Regex regptn1 = new Regex("This"); // 先比對句子各個“字詞“先準備好。
    Regex regptn2 = new Regex("is");
    Regex regptn3 = new Regex("a");
    Regex regptn4 = new Regex("BOOK");

    Match m1 = regptn1.Match(str, 0); // 開始比對
    if (!m1.Success)
    {
    txt02ans.Text = "X"; // not match
    return;
    }

    txt02ans.Text = string.Format("[{0}:{1},{2}]", m1.Value, m1.Index, m1.Length); // match & show tracing information.

    Match m2 = regptn2.Match(str, m1.Index + m1.Length + 1); // 自上次比對位置接續往下比對
    if (!m2.Success)
    {
    txt02ans.Text = txt02ans.Text + " → X";
    return;
    }

    txt02ans.Text = txt02ans.Text + " → " + string.Format("[{0}:{1},{2}]", m2.Value, m2.Index, m2.Length);

    Match m3 = regptn3.Match(str, m2.Index + m2.Length + 1); // 再自上次比對位置接續往下比對不斷重複直到完成
    if (!m3.Success)
    {
    txt02ans.Text = txt02ans.Text + " → X";
    return;
    }

    txt02ans.Text = txt02ans.Text + " → " + string.Format("[{0}:{1},{2}]", m3.Value, m3.Index, m3.Length);

    Match m4 = regptn4.Match(str, m3.Index + m3.Length + 1);
    if (!m4.Success)
    {
    txt02ans.Text = txt02ans.Text + " → X";
    return;
    }

    txt02ans.Text = txt02ans.Text + " → " + string.Format("[{0}:{1},{2}]", m4.Value, m4.Index, m4.Length);
    }
  6. @relyky relyky revised this gist Oct 19, 2016. No changes.
  7. @relyky relyky revised this gist Oct 19, 2016. 1 changed file with 320 additions and 0 deletions.
    320 changes: 320 additions & 0 deletions RegEx_TextFileParseHelper.cs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,320 @@
    //////////////////////////////////////////////////////////////////////////////////////////////////////
    // 使用 RegEx 解析文字報表文件檔
    // 如同報表,可先以節段區分出如:RH (report header), PH (page header), DT (detail)
    // 解析單位為一行。
    // 函式類別只有兩種: IsMatch<REPORT_SECTION>, TryParse<REPORT_SECTION>
    //////////////////////////////////////////////////////////////////////////////////////////////////////

    using System;
    using System.Collections.Generic;
    using System.Text.RegularExpressions;

    namespace TEST
    {
    public class DMR_RptParseHelper
    {
    #region properties

    // detail fields
    public string CustomerID = string.Empty;
    public string ChangeType = string.Empty;
    public string ChangeTypeName = string.Empty;
    public string DataFrom = string.Empty;
    public string DataTo = string.Empty;
    public string VpUserID = string.Empty;

    // header fields
    public string ProcDate = string.Empty;

    #endregion

    //public enum ReportSectionEnum
    //{
    // UNKNOW = 0,
    // RH1,
    // RH2,
    // RH3,
    // PH1,
    // PH2,
    // PH3,
    // DT1,
    // DT2
    //}

    public override string ToString()
    {
    //return base.ToString();
    return string.Format("Acc[{0}]", this.CustomerID)
    + string.Format(", Chg[{0}]", this.ChangeType)
    + string.Format(", ChgN[{0}]", this.ChangeTypeName)
    + string.Format(", From[{0}]", this.DataFrom)
    + string.Format(", To[{0}]", this.DataTo)
    + string.Format(", V+ User[{0}]", this.VpUserID)
    + string.Format(", ProcDate[{0}]", this.ProcDate);
    }

    public bool TryParseDetailLine1(string lineDT1)
    {
    //## parse sentence

    // 預設失敗
    this.CustomerID = string.Empty;
    this.ChangeType = string.Empty;
    this.ChangeTypeName = string.Empty;
    this.DataFrom = string.Empty;
    this.VpUserID = string.Empty;

    // GO

    Regex ptn1 = new Regex(@"\w+"); // Customer ID
    Regex ptn2 = new Regex(@"\d+"); // Change Type
    Regex ptn3 = new Regex(@"\S+(\s\S+)*\s+FROM"); // Change Type Name
    Regex ptn4 = new Regex(@"FROM=\s?\S+"); // Data From
    Regex ptn5 = new Regex(@"[\w.]+"); // V+ User ID

    //# parse field 1 : CustomerID
    Match m1 = ptn1.Match(lineDT1, 0);
    if (!m1.Success)
    return false;

    // match
    this.CustomerID = m1.Value.Trim().TrimStart('0');

    //# parse field 2 : Change Type
    Match m2 = ptn2.Match(lineDT1, m1.Index + m1.Length + 1);
    if (!m2.Success)
    return false;

    // match
    this.ChangeType = m2.Value.Trim();

    //# parse field 3 : Change Type Name
    Match m3 = ptn3.Match(lineDT1, m2.Index + m2.Length + 1);
    if (!m3.Success)
    return false;

    // match
    this.ChangeTypeName = m3.Value.Remove(m3.Value.Length - 4, 4).Trim();

    //# parse field 4 : Data From
    Match m4 = ptn4.Match(lineDT1, m3.Index + m3.Length - 4);
    if (!m4.Success)
    return false;

    // match
    this.DataFrom = m4.Value.Substring(5).Trim();

    //# parse field 5 : V+ User ID
    Match m5 = ptn5.Match(lineDT1, m4.Index + m4.Length + 1);
    if (!m5.Success)
    return false;

    // match
    this.VpUserID = m5.Value.Trim().Replace(".", "");

    // success
    return true;
    }

    public bool TryParseDetailLine2(string lineDT2)
    {
    //# parse field: TO
    Regex ptnTO = new Regex(@"TO=\s?\S+");

    Match m = ptnTO.Match(lineDT2, 0);
    if (!m.Success)
    {
    // not match
    this.DataTo = string.Empty;
    return false;
    }

    // match
    string mstr = m.Value.Substring(3).Trim();
    this.DataTo = mstr;

    // success
    return true;
    }

    public bool TryParseReportHeaderLine3(string lineRH3)
    {
    //# parse field: PROC DATE
    Regex ptnPROC_DATE = new Regex(@"PROC DATE \d\d/\d\d/\d{4}");

    Match m = ptnPROC_DATE.Match(lineRH3, 0);
    if (!m.Success)
    {
    // not match
    this.ProcDate = string.Empty;
    return false;
    }

    // match & parsing
    string mstr = m.Value.Substring(9);
    DateTime dt;

    if (!DateTime.TryParse(mstr, out dt))
    {
    // parsing fail
    this.ProcDate = string.Empty;
    return false;
    }

    // success
    this.ProcDate = dt.ToString("yyyy/MM/dd");
    return true;
    }

    public static bool IsMatchReportHeadLine1(string line)
    {
    //string ptn = @"AR000000 - R09 ANZ - TAIWAN ";
    string ptn = @"^((AR000000 - (R02|R05|R09|R59))\s+(ANZ - TAIWAN)\s{40,})$"; // 注意:後面有40個空白字元以上
    return Regex.IsMatch(line, ptn);
    }

    public static bool IsMatchReportHeadLine2(string line)
    {
    //string ptn = @"021 - ANZ CUST NAME-ADDRESS MAINTENACE FILE DATE 08/26/2016 PAGE 6 ";
    string ptn = @"^((021 - ANZ)\s+(CUST NAME-ADDRESS MAINTENACE)\s+(FILE DATE )\d\d/\d\d/\d{4}\s+(PAGE)\s+\d+)$";
    return Regex.IsMatch(line.Trim(), ptn);
    }

    public static bool IsMatchReportHeadLine3(string line)
    {
    //string ptn = @" PROC DATE 08/27/2016 TIME 00.15.59 ";
    string ptn = @"^(\s{50,}(PROC DATE )\d\d/\d\d/\d{4}\s+(TIME )\d\d\.\d\d\.\d\d\s*)$"; // 注意:前面有50個空白字元以上
    return Regex.IsMatch(line, ptn);
    }

    public static bool IsMatchPageHeadLine1(string line)
    {
    //string ptn = @" MAINT *--SECURITY NAME--* ";
    string ptn = @"^((MAINT)\s+(\*--SECURITY NAME--\*))$";
    return Regex.IsMatch(line.Trim(), ptn);
    }

    public static bool IsMatchPageHeadLine2(string line)
    {
    //string ptn = @" ACCOUNT CODE S DATE TIME ";
    string ptn = @"^((ACCOUNT)\s+(CODE)\s+(S)\s+(DATE)\s+(TIME))$";
    return Regex.IsMatch(line.Trim(), ptn);
    }

    public static bool IsMatchPageHeadLine3(string line)
    {
    //string ptn = @" NUMBER + OCC FIELD DESCRIPTION **-----------------CHANGE DATA-------------** C STAMP STAMP TERM";
    string ptn = @"^((NUMBER)\s+(\+ OCC)\s+(FIELD DESCRIPTION)\s.+(CHANGE DATA).+(C)\s+(STAMP)\s+(STAMP)\s+(TERM))$";
    return Regex.IsMatch(line.Trim(), ptn);
    }

    public static bool IsMatchDetailLine1(string line)
    {
    //string ptn = @"0000000000A122326123 9206 OWNER ADDRESS 1 FROM= 樂利二街962巷35號111樓 CTW7.61 ";

    //Regex ptn1 = new Regex(@"\w+"); // Customer ID
    //Regex ptn2 = new Regex(@"\d+"); // Change Type
    //Regex ptn3 = new Regex(@"\S+(\s\S+)*\s+FROM"); // Change Type Name
    //Regex ptn4 = new Regex(@"FROM=\s?\S+"); // Data From
    //Regex ptn5 = new Regex(@"[\w.]+"); // V+ User ID

    string ptn = @"^((\w+)\s+(\d+)\s+(\S+(\s\S+)*)\s+(FROM=\s?\S+)\s+([\w.]+))$"; // 注意:前面有30個空白字元以上
    return Regex.IsMatch(line.Trim(), ptn);
    }

    public static bool IsMatchDetailLine2(string line)
    {
    //string ptn = @" TO= 樂利九街962巷35號111樓 L 082216 155530 GOIJ";
    string ptn = @"^(\s{30,}(TO=)(\s?\S+)\s+([A-Z])\s+(\d{6})\s+(\d{6})\s+(\w*)\s*)$"; // 注意:前面有30個空白字元以上
    return Regex.IsMatch(line, ptn);
    }

    }
    }

    /////////////////// 應用/測試紀錄 //////////////

    protected void btnLine_Click(object sender, EventArgs e)
    {
    string line1 = txtLine1.Text;
    string line2 = txtLine2.Text;
    bool ret1, ret2;

    txtLineAns.Text = string.Empty;
    // match check

    if(DMR_RptParseHelper.IsMatchDetailLine1(line1))
    txtLineAns.Text += "DT1 Y ";
    else
    txtLineAns.Text += "DT1 N ";

    if(DMR_RptParseHelper.IsMatchDetailLine2(line2))
    txtLineAns.Text += "DT2 Y ";
    else
    txtLineAns.Text += "DT2 N ";

    // parsing

    DMR_RptParseHelper rptInfo = new DMR_RptParseHelper();

    ret1 = rptInfo.TryParseDetailLine1(line1);
    ret2 = rptInfo.TryParseDetailLine2(line2);

    if(ret1 && ret2) // success
    txtLineAns.Text += "; " + rptInfo.ToString();
    else
    txtLineAns.Text += "; FAIL! → " + rptInfo.ToString();
    }

    protected void btnHead_Click(object sender, EventArgs e)
    {
    string head1 = txtHeadLine1.Text;
    string head2 = txtHeadLine2.Text;
    string head3 = txtHeadLine3.Text;
    string head4 = txtHeadLine4.Text;
    string head5 = txtHeadLine5.Text;
    string head6 = txtHeadLine6.Text;

    txtHeadAns.Text = string.Empty;
    if (DMR_RptParseHelper.IsMatchReportHeadLine1(head1))
    txtHeadAns.Text += "RH1 Y "; // show match or not
    else
    txtHeadAns.Text += "RH1 N ";

    if (DMR_RptParseHelper.IsMatchReportHeadLine2(head2))
    txtHeadAns.Text += "RH2 Y ";
    else
    txtHeadAns.Text += "RH2 N ";

    if (DMR_RptParseHelper.IsMatchReportHeadLine3(head3))
    txtHeadAns.Text += "RH3 Y ";
    else
    txtHeadAns.Text += "RH3 N ";

    if (DMR_RptParseHelper.IsMatchPageHeadLine1(head4))
    txtHeadAns.Text += "PH1 Y ";
    else
    txtHeadAns.Text += "PH1 N ";

    if (DMR_RptParseHelper.IsMatchPageHeadLine2(head5))
    txtHeadAns.Text += "PH2 Y ";
    else
    txtHeadAns.Text += "PH2 N ";

    if (DMR_RptParseHelper.IsMatchPageHeadLine3(head6))
    txtHeadAns.Text += "PH3 Y ";
    else
    txtHeadAns.Text += "PH3 N ";


    DMR_RptParseHelper rptInfo = new DMR_RptParseHelper();

    bool ret = rptInfo.TryParseReportHeaderLine3(txtHeadLine3.Text); // parse data
    if (ret)
    {
    txtHeadAns.Text += ", PROC_DATE : " + rptInfo.ProcDate;
    }

    }


  8. @relyky relyky renamed this gist Nov 18, 2015. 1 changed file with 0 additions and 0 deletions.
  9. @relyky relyky renamed this gist Nov 18, 2015. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  10. @relyky relyky revised this gist Aug 6, 2015. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion RegEx CaptureNameOrTel.cs
    Original file line number Diff line number Diff line change
    @@ -5,7 +5,7 @@ public static IEnumerable<NameOrTel> CaptureNameOrTel(string loi_clause)
    {
    // 電話號碼 pattern
    string ptnTel = @"(\([HOM]\))?[0-9()+]+[0-9\-()*~# ]*[0-9]+(\-[HOM]|\([HOM]\)|\(主要\))?";
    // 人名 pattern := 中文名稱 | 英文名稱
    // 人名 pattern := 中文名稱與難字 | 英文名稱
    string ptnName = @"((\p{IsCJKUnifiedIdeographs}|[\ue000-\ue3ff])+([a-zA-z\s]+[a-zA-z\s-]*)?|[a-zA-z]+[a-zA-z\s-]*)";
    // 人名或電話 pattern
    string ptnNameOrTel = "(" + ptnName + "|" + ptnTel + ")";
  11. @relyky relyky renamed this gist Aug 6, 2015. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  12. @relyky relyky revised this gist Aug 6, 2015. No changes.
  13. @relyky relyky renamed this gist Aug 6, 2015. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  14. @relyky relyky revised this gist Aug 6, 2015. No changes.
  15. @relyky relyky revised this gist Aug 6, 2015. 2 changed files with 32 additions and 0 deletions.
    File renamed without changes.
    32 changes: 32 additions & 0 deletions QueryLoiClause.cs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,32 @@
    /// <summary>
    /// 先取出LOI子句;此段確雖未使用RegEX還是值得參考。
    /// </summary>
    public static IEnumerable<string> QueryLoiClause(string notes_for_tsd)
    {
    //1:只解有"LOI"部份,逗號","不夠精準。。
    //2:到六個結束:CBC, DCI, PRO, HYBF, 客戶已辦理授權,被授權人。

    int loi_bgn_idx;
    int loi_end_idx = 0;
    do
    {
    loi_bgn_idx = notes_for_tsd.IndexOf("LOI:", loi_end_idx); // 先找"LOI:"。
    if (loi_bgn_idx > 0)
    {
    // 可能有多種結束方式,每個都試。
    int[] idxs = new int[]{
    notes_for_tsd.IndexOf("CBC", loi_bgn_idx),
    notes_for_tsd.IndexOf("DCI", loi_bgn_idx),
    notes_for_tsd.IndexOf("PRO", loi_bgn_idx),
    notes_for_tsd.IndexOf("HYBF", loi_bgn_idx),
    notes_for_tsd.IndexOf("客戶已辦理授權", loi_bgn_idx),
    notes_for_tsd.IndexOf("被授權人", loi_bgn_idx),
    notes_for_tsd.IndexOf("LOI:", loi_bgn_idx + 4), // 連續有多個"LOI:"..."LOI:"...
    notes_for_tsd.Length // LOI 也可能放在最後。
    };

    loi_end_idx = idxs.Where(c => c != -1).Min(); // 取有符合中之最小值。
    yield return notes_for_tsd.Substring(loi_bgn_idx + 4, loi_end_idx - loi_bgn_idx - 4);
    }
    } while (loi_bgn_idx > 0);
    }
  16. @relyky relyky revised this gist Aug 6, 2015. 1 changed file with 13 additions and 13 deletions.
    26 changes: 13 additions & 13 deletions RegEx example.cs
    Original file line number Diff line number Diff line change
    @@ -1,20 +1,20 @@
    /// <summary>
    /// 再解析出:電話/非電話(即人名);解析POA_Fin7過程的中繼資料
    /// 再解析出:電話/非電話(即人名);
    /// </summary>
    public static IEnumerable<NameOrTel> CaptureNameOrTel(string loi_clause)
    {
    // 電話號碼 pattern
    string ptnTel = @"(\([HOM]\))?[0-9()+]+[0-9\-()*~# ]*[0-9]+(\-[HOM]|\([HOM]\)|\(主要\))?";
    // 名稱 pattern := 中文名稱 | 英文名稱
    string ptnName = @"((\p{IsCJKUnifiedIdeographs}|[\ue000-\ue3ff])+([a-zA-z\s]+[a-zA-z\s-]*)?|[a-zA-z]+[a-zA-z\s-]*)";
    // 名稱或電話 pattern
    string ptnNameOrTel = "(" + ptnName + "|" + ptnTel + ")";
    // 電話號碼 pattern
    string ptnTel = @"(\([HOM]\))?[0-9()+]+[0-9\-()*~# ]*[0-9]+(\-[HOM]|\([HOM]\)|\(主要\))?";
    // 人名 pattern := 中文名稱 | 英文名稱
    string ptnName = @"((\p{IsCJKUnifiedIdeographs}|[\ue000-\ue3ff])+([a-zA-z\s]+[a-zA-z\s-]*)?|[a-zA-z]+[a-zA-z\s-]*)";
    // 人名或電話 pattern
    string ptnNameOrTel = "(" + ptnName + "|" + ptnTel + ")";

    foreach (Match m in Regex.Matches(loi_clause, ptnNameOrTel))
    {
    bool isTel = Regex.IsMatch(m.Value, ptnTel);
    yield return new NameOrTel(isTel, m.Value);
    }
    foreach (Match m in Regex.Matches(loi_clause, ptnNameOrTel))
    {
    bool isTel = Regex.IsMatch(m.Value, ptnTel);
    yield return new NameOrTel(isTel, m.Value);
    }

    yield return null; // 最後一定是null,以表示演算結束。
    yield return null; // 最後一定是null,以表示演算結束。
    }
  17. @relyky relyky created this gist Aug 6, 2015.
    20 changes: 20 additions & 0 deletions RegEx example.cs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,20 @@
    /// <summary>
    /// 再解析出:電話/非電話(即人名);解析POA_Fin7過程的中繼資料
    /// </summary>
    public static IEnumerable<NameOrTel> CaptureNameOrTel(string loi_clause)
    {
    // 電話號碼 pattern
    string ptnTel = @"(\([HOM]\))?[0-9()+]+[0-9\-()*~# ]*[0-9]+(\-[HOM]|\([HOM]\)|\(主要\))?";
    // 名稱 pattern := 中文名稱 | 英文名稱
    string ptnName = @"((\p{IsCJKUnifiedIdeographs}|[\ue000-\ue3ff])+([a-zA-z\s]+[a-zA-z\s-]*)?|[a-zA-z]+[a-zA-z\s-]*)";
    // 名稱或電話 pattern
    string ptnNameOrTel = "(" + ptnName + "|" + ptnTel + ")";

    foreach (Match m in Regex.Matches(loi_clause, ptnNameOrTel))
    {
    bool isTel = Regex.IsMatch(m.Value, ptnTel);
    yield return new NameOrTel(isTel, m.Value);
    }

    yield return null; // 最後一定是null,以表示演算結束。
    }