Quentin Gallouédec commited on
Commit
832915b
·
1 Parent(s): ee53a4b
Files changed (2) hide show
  1. app.py +22 -9
  2. my_lib.py +1 -0
app.py CHANGED
@@ -15,34 +15,47 @@ class WordleEnv:
15
  self._max = max_guesses
16
 
17
  def reset(self) -> str:
 
18
  self._secret = random.choice(list(self.dictionary))
19
  self._n = 0
20
  self._obs = "⬜" * 4
21
  return self._obs
22
 
23
  def step(self, action: str) -> tuple[str, float, bool]:
 
 
 
 
 
 
 
 
24
  guess: str = str(action)
25
  guess = guess.strip().lower()
26
 
27
  if len(guess) != 4 or not guess.isalpha():
28
- raise ValueError("Action must be a 4-letter lowercase word.")
29
-
30
- if self._n >= self._max:
31
- raise RuntimeError("Episode is done. Call reset() to start a new episode.")
32
 
33
  self._n += 1
34
- secret = self._secret
 
35
  feedback: list[str] = []
36
  for i, ch in enumerate(guess):
37
- if ch == secret[i]:
38
  feedback.append("🟩")
39
- elif ch in secret:
40
  feedback.append("🟨")
41
  else:
42
  feedback.append("⬜")
43
  self._obs = "".join(feedback)
44
- done = guess == secret or self._n >= self._max
45
- reward = 1.0 if guess == secret else 0.0
 
 
 
 
 
 
46
  return self._obs, reward, done
47
 
48
 
 
15
  self._max = max_guesses
16
 
17
  def reset(self) -> str:
18
+ """Reset the environment and return the initial observation."""
19
  self._secret = random.choice(list(self.dictionary))
20
  self._n = 0
21
  self._obs = "⬜" * 4
22
  return self._obs
23
 
24
  def step(self, action: str) -> tuple[str, float, bool]:
25
+ """
26
+ Take an action (a 4-letter word) and return (observation, reward, done).
27
+ If
28
+ When done is True, the episode has ended and reset() should be called to start a new episode.
29
+ """
30
+ if self._n >= self._max:
31
+ return "The game is over. Please reset.", -1.0, True
32
+
33
  guess: str = str(action)
34
  guess = guess.strip().lower()
35
 
36
  if len(guess) != 4 or not guess.isalpha():
37
+ return "Invalid guess. Must be a 4-letter word.", -1.0, False
 
 
 
38
 
39
  self._n += 1
40
+
41
+ # Compute feedback
42
  feedback: list[str] = []
43
  for i, ch in enumerate(guess):
44
+ if ch == self._secret[i]:
45
  feedback.append("🟩")
46
+ elif ch in self._secret:
47
  feedback.append("🟨")
48
  else:
49
  feedback.append("⬜")
50
  self._obs = "".join(feedback)
51
+
52
+ # Check for success or timeout and compute reward
53
+ success = guess == self._secret
54
+ timeout = self._n >= self._max
55
+ done = success or timeout
56
+ reward = 1.0 if success else 0.0
57
+ if done and not success:
58
+ self._obs += f" Game over. The word was '{self._secret}'."
59
  return self._obs, reward, done
60
 
61
 
my_lib.py CHANGED
@@ -42,6 +42,7 @@ def get_demo(env_cls: type) -> gr.Blocks:
42
  sessions = {} # just a dict now
43
 
44
  def init_env() -> str:
 
45
  session_id = str(uuid.uuid4())
46
  env = env_cls()
47
  sessions[session_id] = env
 
42
  sessions = {} # just a dict now
43
 
44
  def init_env() -> str:
45
+ """Initialize a new environment instance and return a session ID."""
46
  session_id = str(uuid.uuid4())
47
  env = env_cls()
48
  sessions[session_id] = env