using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using PanGu.Dict;
using PanGu.Framework;
using PanGu.Match;
using PanGu.Setting;
using PanGu.HighLight;
using Lucene.Net.Analysis;
using System.IO;
using PanGu;
using System.Configuration;
namespace Pangu.Helper
{
public class PanguHelper
{
private volatile static PanguHelper _instance = null;
private static readonly object lockHelper = new object();
private PanguHelper() { }
/// <summary>
/// [默认使用同路径下的 pangu.xml 配置文件]
/// </summary>
/// <returns></returns>
public static PanguHelper ShareHelper
{
get
{
if (_instance == null)
{
lock (lockHelper)
{
if (_instance == null)
{
_instance = new PanguHelper();
string configFile = ConfigurationManager.AppSettings["PANGU_CONFIG"] + "";
if (configFile != "")
{
_instance.Use(configFile);
}
}
}
}
return _instance;
}
}
/// <summary>
/// 初始化指定配置文件
/// </summary>
/// <param name="configFile">pangu配置文件</param>
public void Use(string configFile)
{
PanGu.Segment.Init(configFile);
}
/// <summary>
/// 获取词性描述
/// </summary>
/// <param name="pos">词性枚举</param>
/// <returns></returns>
public string GetPosDescrip{过滤}t(POS pos)
{
switch (pos)
{
case POS.POS_UNK: return "未知词性";
case POS.POS_D_K: return "后接成分";
case POS.POS_D_H: return "前接成分";
case POS.POS_A_NZ: return "其他专名";
case POS.POS_A_NX: return "外文字符";
case POS.POS_A_NR: return "人名";
case POS.POS_D_Z: return "状态词";
case POS.POS_A_NT: return "机构团体";
case POS.POS_A_NS: return "地名";
case POS.POS_D_Y: return "语气词 语气语素";
case POS.POS_D_X: return "非语素字";
case POS.POS_D_W: return "标点符号";
case POS.POS_D_T: return "时间词";
case POS.POS_D_S: return "处所词";
case POS.POS_D_V: return "动词 动语素";
case POS.POS_D_U: return "助词 助语素";
case POS.POS_D_R: return "代词 代语素";
case POS.POS_A_Q: return "量词 量语素";
case POS.POS_D_P: return "介词";
case POS.POS_D_MQ: return "数量词";
case POS.POS_A_M: return "数词 数语素";
case POS.POS_D_O: return "拟声词";
case POS.POS_D_N: return "名词 名语素";
case POS.POS_D_F: return "方位词 方位语素";
case POS.POS_D_E: return "叹词 叹语素";
case POS.POS_D_L: return "习语";
case POS.POS_D_I: return "成语";
case POS.POS_D_D: return "副词 副语素";
case POS.POS_D_C: return "连词 连语素";
case POS.POS_D_B: return "区别词 区别语素";
case POS.POS_D_A: return "形容词 形语素";
}
return "未知词性";
}
/// <summary>
/// 获取分词 [原始Wordinfo]
/// </summary>
/// <param name="content">要分词的内容</param>
/// <param name="matchOptions">使用匹配选项</param>
/// <param name="matchParameter">使用匹配参数</param>
/// <returns></returns>
public ICollection<WordInfo> GetSegmentWords(string content, MatchOptions matchOptions = null,
MatchParameter matchParameter = null)
{
Segment segment = new Segment();
ICollection<WordInfo> words = segment.DoSegment(content, matchOptions, matchParameter);
return words;
}
/// <summary>
/// 获取分词
/// </summary>
/// <param name="content">要分词的内容</param>
/// <param name="matchOptions">使用匹配选项,默认不使用</param>
/// <param name="matchParameter">使用匹配参数,默认不使用</param>
public List<string> GetSplitWords(string content, MatchOptions matchOptions = null, MatchParameter matchParameter = null)
{
Segment segment = new Segment();
ICollection<WordInfo> words = segment.DoSegment(content,matchOptions,matchParameter);
List<string> list = new List<string>();
foreach (WordInfo word in words)
{
list.Add(word.Word);
}
return list;
}
/// <summary>
/// 获取指定词频的分词
/// </summary>
/// <param name="content">要分词的内容</param>
/// <param name="frequency">词频</param>
/// <param name="matchOptions">使用匹配选项,默认不使用</param>
/// <param name="matchParameter">使用匹配参数,默认不使用</param>
/// <returns></returns>
public List<string> GetSplitWordsByFrequency(string content, double frequency, MatchOptions matchOptions = null,
MatchParameter matchParameter = null)
{
Segment segment = new Segment();
ICollection<WordInfo> words = segment.DoSegment(content, matchOptions, matchParameter);
List<string> list = new List<string>();
foreach (WordInfo word in words)
{
if (frequency == word.Frequency)
{
list.Add(word.Word);
}
}
return list;
}
/// <summary>
/// 获取指定权重的分词
/// </summary>
/// <param name="content">要分词的内容</param>
/// <param name="rank">权重</param>
/// <param name="matchOptions">使用匹配选项,默认不使用</param>
/// <param name="matchParameter">使用匹配参数,默认不使用</param>
/// <returns></returns>
public List<string> GetSplitWordsByRank(string content, int rank, MatchOptions matchOptions = null,
MatchParameter matchParameter = null)
{
Segment segment = new Segment();
ICollection<WordInfo> words = segment.DoSegment(content, matchOptions, matchParameter);
List<string> list = new List<string>();
foreach (WordInfo word in words)
{
if (rank == word.Rank)
{
list.Add(word.Word);
}
}
return list;
}
/// <summary>
/// 高亮
/// </summary>
/// <param name="keyword">要高亮的字符串</param>
/// <param name="content">content</param>
/// <param name="fragmentSize">每个摘要字段的字符数</param>
/// <returns>高亮后的content</returns>
public static string HighLight(string keyword, string content, int fragmentSize)
{
//创建HTMLFormatter,参数为高亮搜索词的HTML代码
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
//创建高亮,输入HTML代码和 盘古对象Semgent
Highlighter highter = new Highlighter(simpleHTMLFormatter, new Segment());
//设置每个摘要字段的字符数
highter.FragmentSize = fragmentSize;
string highlightStr = highter.GetBestFragment(keyword, content);
if (string.IsNullOrEmpty(highlightStr))
{
return content;
}
return highlightStr;
}
}
}
|