tashaphyne

package module
v0.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 6, 2025 License: MIT Imports: 5 Imported by: 0

README

tashaphyne

The Go port of Tashaphyne Arabic Light Stemmer

Documentation

Index

Constants

View Source
const (
	Comma          = "\u060C"
	Semicolon      = "\u061B"
	Question       = "\u061F"
	Hamza          = "\u0621"
	AlefMadda      = "\u0622"
	AlefHamzaAbove = "\u0623"
	WawHamza       = "\u0624"
	AlefHamzaBelow = "\u0625"
	YehHamza       = "\u0626"
	Alef           = "\u0627"
	Beh            = "\u0628"
	TehMarbuta     = "\u0629"
	Teh            = "\u062a"
	Theh           = "\u062b"
	Jeem           = "\u062c"
	Hah            = "\u062d"
	Khah           = "\u062e"
	Dal            = "\u062f"
	Thal           = "\u0630"
	Reh            = "\u0631"
	Zain           = "\u0632"
	Seen           = "\u0633"
	Sheen          = "\u0634"
	Sad            = "\u0635"
	Dad            = "\u0636"
	Tah            = "\u0637"
	Zah            = "\u0638"
	Ain            = "\u0639"
	Ghain          = "\u063a"
	Tatweel        = "\u0640"
	Feh            = "\u0641"
	Qaf            = "\u0642"
	Kaf            = "\u0643"
	Lam            = "\u0644"
	Meem           = "\u0645"
	Noon           = "\u0646"
	Heh            = "\u0647"
	Waw            = "\u0648"
	AlefMaksura    = "\u0649"
	Yeh            = "\u064a"
	MaddaAbove     = "\u0653"
	HamzaAbove     = "\u0654"
	HamzaBelow     = "\u0655"
	Zero           = "\u0660"
	One            = "\u0661"
	Two            = "\u0662"
	Three          = "\u0663"
	Four           = "\u0664"
	Five           = "\u0665"
	Six            = "\u0666"
	Seven          = "\u0667"
	Eight          = "\u0668"
	Nine           = "\u0669"
	Percent        = "\u066a"
	Decimal        = "\u066b"
	Thousands      = "\u066c"
	Star           = "\u066d"
	MiniAlef       = "\u0670"
	AlefWasla      = "\u0671"
	FullStop       = "\u06d4"
	ByteOrderMark  = "\ufeff"

	// Diacritics
	Fathatan = "\u064b"
	Dammatan = "\u064c"
	Kasratan = "\u064d"
	Fatha    = "\u064e"
	Damma    = "\u064f"
	Kasra    = "\u0650"
	Shadda   = "\u0651"
	Sukun    = "\u0652"

	// Ligatures
	LamAlef                 = "\ufefb"
	LamAlefHamzaAbove       = "\ufef7"
	LamAlefHamzaBelow       = "\ufef9"
	LamAlefMaddaAbove       = "\ufef5"
	SimpleLamAlef           = "\u0644\u0627"
	SimpleLamAlefHamzaAbove = "\u0644\u0623"
	SimpleLamAlefHamzaBelow = "\u0644\u0625"
	SimpleLamAlefMaddaAbove = "\u0644\u0622"
)
View Source
const (
	DefaultPrefixLetters = "مأسفلونيتاكب"
	DefaultSuffixLetters = "امتةكنهوي"
	DefaultInfixLetters  = "اتويطد"
	DefaultMaxPrefix     = 6
	DefaultMaxSuffix     = 5
	DefaultMinStem       = 3
	DefaultJoker         = "*"
	Verb                 = "verb"
	Noun                 = "noun"
)

Variables

View Source
var (
	AffixList          = append(VerbAffixList, NounAffixList...)
	StemmingPrefixList = append(VerbPrefixList, NounPrefixList...)
	StemmingSuffixList = append(VerbSuffixList, NounSuffixList...)
)
View Source
var (
	DefaultPrefixList = []string{} /* 242 elements not displayed */
	DefaultSuffixList = []string{} /* 294 elements not displayed */
)
View Source
var NounAffixList = []string{}/* 2177 elements not displayed */
View Source
var NounPrefixList = []string{
	"",
	"آل",
	"أ",
	"أب",
	"أبال",
	"أف",
	"أفال",
	"أفب",
	"أفبال",
	"أفك",
	"أفكال",
	"أفل",
	"أفلل",
	"أك",
	"أكال",
	"أل",
	"ألل",
	"أو",
	"أوال",
	"أوب",
	"أوبال",
	"أوك",
	"أوكال",
	"أول",
	"أولل",
	"ال",
	"ب",
	"بال",
	"ف",
	"فال",
	"فب",
	"فبال",
	"فك",
	"فكال",
	"فل",
	"فلل",
	"ك",
	"كال",
	"ل",
	"لل",
	"و",
	"وال",
	"وب",
	"وبال",
	"وك",
	"وكال",
	"ول",
	"ولل",
}
View Source
var NounSuffixList = []string{}/* 102 elements not displayed */
View Source
var Roots = []string{} /* 7504 elements not displayed */
View Source
var StopWords = map[string]StopWord{}/* 10339 elements not displayed */
View Source
var VerbAffixList = []string{}/* 3879 elements not displayed */
View Source
var VerbPrefixList = []string{
	"",
	"أ",
	"أأ",
	"أت",
	"أسأ",
	"أست",
	"أسن",
	"أسي",
	"أف",
	"أفأ",
	"أفت",
	"أفسأ",
	"أفست",
	"أفسن",
	"أفسي",
	"أفن",
	"أفي",
	"أن",
	"أو",
	"أوأ",
	"أوت",
	"أوسأ",
	"أوست",
	"أوسن",
	"أوسي",
	"أولأ",
	"أولت",
	"أولن",
	"أولي",
	"أون",
	"أوي",
	"أي",
	"ا",
	"ت",
	"سأ",
	"ست",
	"سن",
	"سي",
	"ف",
	"فأ",
	"فا",
	"فت",
	"فسأ",
	"فست",
	"فسن",
	"فسي",
	"فل",
	"فلأ",
	"فلت",
	"فلن",
	"فلي",
	"فن",
	"في",
	"ل",
	"لأ",
	"لت",
	"لن",
	"لي",
	"ن",
	"و",
	"وأ",
	"وا",
	"وت",
	"وسأ",
	"وست",
	"وسن",
	"وسي",
	"ول",
	"ولأ",
	"ولت",
	"ولن",
	"ولي",
	"ون",
	"وي",
	"ي",
}
View Source
var VerbSuffixList = []string{}/* 160 elements not displayed */
View Source
var Verbs map[string]struct{}

Functions

func In

func In[T comparable](needle T, haystack ...T) bool

func IsStop

func IsStop(s string) bool

func IsVerbStamp

func IsVerbStamp(s string) bool

func Max

func Max[T cmp.Ordered](values ...T) (m T)

func Min

func Min[T cmp.Ordered](values ...T) (m T)

func NormalizeHamza

func NormalizeHamza(s string) string

func NormalizeLamAlef

func NormalizeLamAlef(s string) string

func NormalizeSearchText

func NormalizeSearchText(s string) string

func NormalizeSpellErrors

func NormalizeSpellErrors(s string) string

func StopStem

func StopStem(s string) string

func StripTashkeel

func StripTashkeel(s string) string

func StripTatweel

func StripTatweel(s string) string

func VerbStamp

func VerbStamp(s Runes) string

Types

type Runes

type Runes []rune

func (Runes) Append

func (r Runes) Append(s string) Runes

func (Runes) At

func (r Runes) At(pos int) string

func (Runes) Contains

func (r Runes) Contains(sub string) bool

func (Runes) HasPrefix

func (r Runes) HasPrefix(prefix string) bool

func (Runes) HasSuffix

func (r Runes) HasSuffix(suffix string) bool

func (Runes) In

func (r Runes) In(values ...string) bool

func (Runes) IndexOf

func (r Runes) IndexOf(s string) int

func (Runes) LastIndexOf

func (r Runes) LastIndexOf(s string) int

func (Runes) Prepend

func (r Runes) Prepend(s string) Runes

func (Runes) Replace

func (r Runes) Replace(old, new string) Runes

func (Runes) ReplaceRegex

func (r Runes) ReplaceRegex(re *regexp.Regexp, new string) Runes

func (Runes) Slice

func (r Runes) Slice(from, to int) Runes

func (Runes) SliceFrom

func (r Runes) SliceFrom(from int) Runes

func (Runes) SliceTo

func (r Runes) SliceTo(to int) Runes

func (Runes) String

func (r Runes) String() string

func (Runes) StripTashkeel

func (r Runes) StripTashkeel() Runes

type Stemmer

type Stemmer struct {
	// contains filtered or unexported fields
}

func New

func New() *Stemmer

func (*Stemmer) LightStem

func (s *Stemmer) LightStem(word string) string

type StopWord

type StopWord struct {
	Word      string `json:"word,omitempty"`
	Procletic string `json:"procletic,omitempty"`
	Tags      string `json:"tags,omitempty"`
	Vocalized string `json:"vocalized,omitempty"`
	Stem      string `json:"stem,omitempty"`
	Type      string `json:"type,omitempty"`
	Original  string `json:"original,omitempty"`
	Encletic  string `json:"encletic,omitempty"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL