龙盟编程博客 | 无障碍搜索 | 云盘搜索神器
快速搜索
主页 > 软件开发 > C/C++开发 >

马尔可夫链算法(markov算法)的awk、C++、C语言实现代码(2)

时间:2014-08-21 02:34来源:网络整理 作者:网络 点击:
分享到:
如果需要程序运行得足够快,那就只能用较低级的语言来实现了。当我们用c 语言来实现时,就不得不考虑各种各样的问题了。首先,面临的第一个问题就

如果需要程序运行得足够快,那就只能用较低级的语言来实现了。当我们用c 语言来实现时,就不得不考虑各种各样的问题了。首先,面临的第一个问题就是,如何表示前缀和后缀的关系?

这里采用前缀的key,后缀为value 的方式存储前缀与后缀的关系,我们知道,hash表的查找速度最快,所以,这里采用hash表也是情理之中的事,只是看你能不能想到,用前缀作key,基于上面的思路,再仔细一点,就没有什么大问题了。

/* Copyright (C) 1999 Lucent Technologies */
/* Excerpted from 'The Practice of Programming' */
/* by Brian W. Kernighan and Rob Pike */

/*
 * Markov chain random text generator.
 */

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include "eprintf.h"

enum {
  NPREF  = 2,  /* number of prefix words */
  NHASH  = 4093, /* size of state hash table array */
  MAXGEN = 10000 /* maximum words generated */
};

typedef struct State State;
typedef struct Suffix Suffix;

struct State { /* prefix + suffix list */
  char  *pref[NPREF];  /* prefix words */
  Suffix *suf;      /* list of suffixes */
  State  *next;     /* next in hash table */
};

struct Suffix { /* list of suffixes */
  char  *word;     /* suffix */
  Suffix *next;     /* next in list of suffixes */
};

State  *lookup(char *prefix[], int create);
void  build(char *prefix[], FILE*);
void  generate(int nwords);
void  add(char *prefix[], char *word);

State  *statetab[NHASH];  /* hash table of states */

char NONWORD[] = "\n"; /* cannot appear as real word */

/* markov main: markov-chain random text generation */
int main(void)
{
  int i, nwords = MAXGEN;
  char *prefix[NPREF];    /* current input prefix */

  int c;
  long seed;

  setprogname("markov");
  seed = time(NULL);

  srand(seed);
  for (i = 0; i < NPREF; i++) /* set up initial prefix */
    prefix[i] = NONWORD;
  build(prefix, stdin);
  add(prefix, NONWORD);
  generate(nwords);
  return 0;
}  

const int MULTIPLIER = 31; /* for hash() */

/* hash: compute hash value for array of NPREF strings */
unsigned int hash(char *s[NPREF])
{
  unsigned int h;
  unsigned char *p;
  int i;

  h = 0;
  for (i = 0; i < NPREF; i++)
    for (p = (unsigned char *) s[i]; *p != '\0'; p++)
      h = MULTIPLIER * h + *p;
  return h % NHASH;
}

/* lookup: search for prefix; create if requested. */
/* returns pointer if present or created; NULL if not. */
/* creation doesn't strdup so strings mustn't change later. */
State* lookup(char *prefix[NPREF], int create)
{
  int i, h;
  State *sp;

  h = hash(prefix);
  for (sp = statetab[h]; sp != NULL; sp = sp->next) {
    for (i = 0; i < NPREF; i++)
      if (strcmp(prefix[i], sp->pref[i]) != 0)
        break;
    if (i == NPREF)   /* found it */
      return sp;
  }
  if (create) {
    sp = (State *) emalloc(sizeof(State));
    for (i = 0; i < NPREF; i++)
      sp->pref[i] = prefix[i];
    sp->suf = NULL;
    sp->next = statetab[h];
    statetab[h] = sp;
  }
  return sp;
}

/* addsuffix: add to state. suffix must not change later */
void addsuffix(State *sp, char *suffix)
{
  Suffix *suf;

  suf = (Suffix *) emalloc(sizeof(Suffix));
  suf->word = suffix;
  suf->next = sp->suf;
  sp->suf = suf;
}

/* add: add word to suffix list, update prefix */
void add(char *prefix[NPREF], char *suffix)
{
  State *sp;

  sp = lookup(prefix, 1); /* create if not found */
  addsuffix(sp, suffix);
  /* move the words down the prefix */
  memmove(prefix, prefix+1, (NPREF-1)*sizeof(prefix[0]));
  prefix[NPREF-1] = suffix;
}

/* build: read input, build prefix table */
void build(char *prefix[NPREF], FILE *f)
{
  char buf[100], fmt[10];

  /* create a format string; %s could overflow buf */
  sprintf(fmt, "%%%ds", sizeof(buf)-1);
  while (fscanf(f, fmt, buf) != EOF)
    add(prefix, estrdup(buf));
}

/* generate: produce output, one word per line */
void generate(int nwords)
{
  State *sp;
  Suffix *suf;
  char *prefix[NPREF], *w;
  int i, nmatch;

  for (i = 0; i < NPREF; i++) /* reset initial prefix */
    prefix[i] = NONWORD;

  for (i = 0; i < nwords; i++) {
    sp = lookup(prefix, 0);
    if (sp == NULL)
      eprintf("internal error: lookup failed");
    nmatch = 0;
    for (suf = sp->suf; suf != NULL; suf = suf->next)
      if (rand() % ++nmatch == 0) /* prob = 1/nmatch */
        w = suf->word;
    if (nmatch == 0)
      eprintf("internal error: no suffix %d %s", i, prefix[0]);
    if (strcmp(w, NONWORD) == 0)
      break;
    printf("%s\n", w);
    memmove(prefix, prefix+1, (NPREF-1)*sizeof(prefix[0]));
    prefix[NPREF-1] = w;
  }
}
精彩图集

赞助商链接