Download Work - 840 -2024- Bengla -www.mazabd.click... Site

def entropy(s): """Shannon entropy of a string.""" probs = np.bincount(list(s.encode())) / len(s) probs = probs[probs > 0] return -np.sum(probs * np.log2(probs))

def extract_features(subject: str) -> dict: # ---- Basic tokenisation ------------------------------------------------- tokens = re.split(r'\s+', subject.strip()) n_tokens = len(tokens) n_chars = len(subject) Download WORK - 840 -2024- Bengla -www.mazabd.click...

# ---- Build dict --------------------------------------------------------- return { "n_tokens": n_tokens, "n_chars": n_chars, "avg_token_len": avg_token_len, "upper_ratio": upper_ratio, "digit_ratio": digit_ratio, "stop_ratio": stop_ratio, "has_action_verb": int(has_action), "has_suspicious_kw": int(has_suspicious), "hyphen_cnt": hyphen_cnt, "ellipsis": int(ellipsis), "numeric_pattern": int(numeric_pattern), "domain_present": int(bool(domain)), "registered_domain": registered, "tld": tld, "subdomain_cnt": subdomain_cnt, def entropy(s): """Shannon entropy of a string

# Dummy placeholders for reputation / age (replace with real API calls) domain_age_days = 9999 # e.g., today - creation_date domain_risk = 0 # 0 = clean, 1 = flagged Download WORK - 840 -2024- Bengla -www.mazabd.click...

# ---- URL / domain cues -------------------------------------------------- # Grab anything that looks like a domain (very permissive) domain_match = re.search(r'([a-z0-9-]+\.)+[a-z]2,', subject, re.I) domain = domain_match.group(0) if domain_match else '' ext = tldextract.extract(domain) registered = f"ext.domain.ext.suffix" if ext.suffix else '' tld = ext.suffix or '' subdomain_cnt = domain.count('.') - 1 if domain else 0 hyphen_in_domain = '-' in ext.domain